summaryrefslogtreecommitdiff
path: root/doc
diff options
context:
space:
mode:
Diffstat (limited to 'doc')
-rw-r--r--doc/.gitignore5
-rw-r--r--doc/Doxyfile.in18
-rw-r--r--doc/Makefile.am35
-rw-r--r--doc/Makefile.in481
-rwxr-xr-xdoc/release.sh.in4
-rw-r--r--doc/rfc1866.htm4446
-rw-r--r--doc/rfc3513.htm1579
-rw-r--r--doc/rfc3986.htm3539
-rw-r--r--doc/rfc3986_grammar_only.txt80
9 files changed, 9660 insertions, 527 deletions
diff --git a/doc/.gitignore b/doc/.gitignore
new file mode 100644
index 0000000..ba9a3b7
--- /dev/null
+++ b/doc/.gitignore
@@ -0,0 +1,5 @@
+/Doxyfile
+/html
+/release.sh
+/uriparser-*.qch
+/uriparser-*-doc.zip
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
index dd0926f..da4132f 100644
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@@ -1,12 +1,12 @@
# Doxyfile for Doxygen 1.5.7
# .qhp output
-GENERATE_QHP = yes
+GENERATE_QHP = @GENERATE_QHP@
QHP_NAMESPACE = "io.github.uriparser"
-QHP_VIRTUAL_FOLDER = "uriparser-@VERSION@"
+QHP_VIRTUAL_FOLDER = "uriparser-@PROJECT_VERSION@"
# .qch output
-QCH_FILE = "../@PACKAGE@-@VERSION@.qch"
+QCH_FILE = "../@PROJECT_NAME@-@PROJECT_VERSION@.qch"
QHG_LOCATION = "@QHG_LOCATION@"
@@ -19,11 +19,11 @@ PROJECT_NAME = "uriparser"
# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by double-quotes) that should identify the project for which the documentation is generated. This name is used in the title of most generated pages and in a few other places.
-PROJECT_NUMBER = "@VERSION@"
+PROJECT_NUMBER = "@PROJECT_VERSION@"
# The PROJECT_NUMBER tag can be used to enter a project or revision number. This could be handy for archiving the generated documentation or if some version control system is used.
-CHM_FILE = "..\@PACKAGE@-@VERSION@.chm"
+CHM_FILE = "..\@PROJECT_NAME@-@PROJECT_VERSION@.chm"
# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can be used to specify the file name of the resulting .chm file. You can add a path in front of the file if the result should not be written to the html output directory.
@@ -280,7 +280,7 @@ WARN_FORMAT = "WARNING: $text ($line, $file)"
###############################################################
-INPUT = @ac_abs_confdir@/include @ac_abs_confdir@/doc/Mainpage.txt
+INPUT = @CMAKE_CURRENT_SOURCE_DIR@/include @CMAKE_CURRENT_SOURCE_DIR@/doc/Mainpage.txt
# The INPUT tag is used to specify the files and/or directories that contain documented source files. You may enter file names like myfile.cpp or directories like /usr/src/myproject. Separate the files or directories with spaces.
#
# Note: If this tag is empty the current directory is searched.
@@ -346,7 +346,7 @@ RECURSIVE = YES
# The IMAGE_PATH tag can be used to specify one or more files or directories that contain images that are to be included in the documentation (see the \image command).
-INPUT_FILTER = "bash @ac_abs_confdir@/doc/preprocess.sh"
+INPUT_FILTER = "bash @CMAKE_CURRENT_SOURCE_DIR@/doc/preprocess.sh"
# The INPUT_FILTER tag can be used to specify a program that doxygen should invoke to filter for each input file. Doxygen will invoke the filter program by executing (via popen()) the command:
#
# <filter> <input-file>
@@ -497,7 +497,7 @@ GENERATE_HTMLHELP = @GENERATE_HTMLHELP@
# The HTML Help Workshop contains a compiler that can convert all HTML output generated by doxygen into a single compressed HTML file (.chm). Compressed HTML files are now used as the Windows 98 help format, and will replace the old Windows help format (.hlp) on all Windows platforms in the future. Compressed HTML files also contain an index, a table of contents, and you can search for words in the documentation. The HTML workshop also contains a viewer for compressed HTML files.
-HHC_LOCATION = ../hhc.exe
+HHC_LOCATION = "@HTML_HELP_COMPILER@"
# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can be used to specify the location (absolute path including file name) of the HTML help compiler (hhc.exe). If non empty doxygen will try to run the HTML help compiler on the generated index.hhp.
@@ -746,7 +746,7 @@ SEARCH_INCLUDES = YES
# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files in the INCLUDE_PATH (see below) will be searched if a #include is found.
-INCLUDE_PATH = @ac_abs_confdir@/include
+INCLUDE_PATH = @CMAKE_CURRENT_SOURCE_DIR@/include
# The INCLUDE_PATH tag can be used to specify one or more directories that contain include files that are not input files but should be processed by the preprocessor.
diff --git a/doc/Makefile.am b/doc/Makefile.am
deleted file mode 100644
index 012f0bc..0000000
--- a/doc/Makefile.am
+++ /dev/null
@@ -1,35 +0,0 @@
-## Build doc files
-all-local: html/index.html
-
-html/index.html: $(srcdir)/../include/uriparser/*.h \
- $(srcdir)/../src/*.c \
- $(srcdir)/../src/*.h \
- $(srcdir)/Mainpage.txt
- rm -Rf "$(builddir)/html"
- doxygen Doxyfile
- touch html/index.html
-
-
-## Clean doc files
-clean-local:
- rm -Rf "$(builddir)/html"
- rm -f *.qch
-
-
-## Clean configure files
-distclean-local:
- rm -f config.{log,status} release.sh Doxyfile
-
-
-## Install doc files
-install-data-local:
- $(MKDIR_P) "$(DESTDIR)$(docdir)/html/search" ## Didn't work with installdirs-local
- $(INSTALL_DATA) html/*.css html/*.html html/*.js html/*.md5 html/*.png "$(DESTDIR)$(docdir)/html/"
- $(INSTALL_DATA) html/search/* "$(DESTDIR)$(docdir)/html/search/"
- -$(INSTALL_DATA) *.qch "$(DESTDIR)$(docdir)/"
-
-
-## Uninstall doc files
-uninstall-local:
- rm -Rf "$(DESTDIR)$(docdir)/html"
- rm -f "$(DESTDIR)$(docdir)"/*.qch
diff --git a/doc/Makefile.in b/doc/Makefile.in
deleted file mode 100644
index 801fbe3..0000000
--- a/doc/Makefile.in
+++ /dev/null
@@ -1,481 +0,0 @@
-# Makefile.in generated by automake 1.15.1 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994-2017 Free Software Foundation, Inc.
-
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-@SET_MAKE@
-VPATH = @srcdir@
-am__is_gnu_make = { \
- if test -z '$(MAKELEVEL)'; then \
- false; \
- elif test -n '$(MAKE_HOST)'; then \
- true; \
- elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
- true; \
- else \
- false; \
- fi; \
-}
-am__make_running_with_option = \
- case $${target_option-} in \
- ?) ;; \
- *) echo "am__make_running_with_option: internal error: invalid" \
- "target option '$${target_option-}' specified" >&2; \
- exit 1;; \
- esac; \
- has_opt=no; \
- sane_makeflags=$$MAKEFLAGS; \
- if $(am__is_gnu_make); then \
- sane_makeflags=$$MFLAGS; \
- else \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- bs=\\; \
- sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
- | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
- esac; \
- fi; \
- skip_next=no; \
- strip_trailopt () \
- { \
- flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
- }; \
- for flg in $$sane_makeflags; do \
- test $$skip_next = yes && { skip_next=no; continue; }; \
- case $$flg in \
- *=*|--*) continue;; \
- -*I) strip_trailopt 'I'; skip_next=yes;; \
- -*I?*) strip_trailopt 'I';; \
- -*O) strip_trailopt 'O'; skip_next=yes;; \
- -*O?*) strip_trailopt 'O';; \
- -*l) strip_trailopt 'l'; skip_next=yes;; \
- -*l?*) strip_trailopt 'l';; \
- -[dEDm]) skip_next=yes;; \
- -[JT]) skip_next=yes;; \
- esac; \
- case $$flg in \
- *$$target_option*) has_opt=yes; break;; \
- esac; \
- done; \
- test $$has_opt = yes
-am__make_dryrun = (target_option=n; $(am__make_running_with_option))
-am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-subdir = doc
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \
- $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
- $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
- $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES = Doxyfile release.sh
-CONFIG_CLEAN_VPATH_FILES =
-AM_V_P = $(am__v_P_@AM_V@)
-am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
-am__v_P_0 = false
-am__v_P_1 = :
-AM_V_GEN = $(am__v_GEN_@AM_V@)
-am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
-am__v_GEN_0 = @echo " GEN " $@;
-am__v_GEN_1 =
-AM_V_at = $(am__v_at_@AM_V@)
-am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
-am__v_at_0 = @
-am__v_at_1 =
-SOURCES =
-DIST_SOURCES =
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
-am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
-am__DIST_COMMON = $(srcdir)/Doxyfile.in $(srcdir)/Makefile.in \
- $(srcdir)/release.sh.in
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
-AR = @AR@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-CC = @CC@
-CCDEPMODE = @CCDEPMODE@
-CFLAGS = @CFLAGS@
-CPP = @CPP@
-CPPFLAGS = @CPPFLAGS@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXDEPMODE = @CXXDEPMODE@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DEFS = @DEFS@
-DEPDIR = @DEPDIR@
-DLLTOOL = @DLLTOOL@
-DOXY_CHECK = @DOXY_CHECK@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-FGREP = @FGREP@
-GENERATE_HTMLHELP = @GENERATE_HTMLHELP@
-GRAPHVIZ_CHECK = @GRAPHVIZ_CHECK@
-GREP = @GREP@
-GTEST_CFLAGS = @GTEST_CFLAGS@
-GTEST_LIBS = @GTEST_LIBS@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LIBOBJS = @LIBOBJS@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-PKG_CONFIG = @PKG_CONFIG@
-PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
-PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
-QHG_LOCATION = @QHG_LOCATION@
-RANLIB = @RANLIB@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-STRIP = @STRIP@
-VERSION = @VERSION@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_abs_confdir = @ac_abs_confdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__include = @am__include@
-am__leading_dot = @am__leading_dot@
-am__quote = @am__quote@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-all: all-am
-
-.SUFFIXES:
-$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --foreign doc/Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-Doxyfile: $(top_builddir)/config.status $(srcdir)/Doxyfile.in
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
-release.sh: $(top_builddir)/config.status $(srcdir)/release.sh.in
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-tags TAGS:
-
-ctags CTAGS:
-
-cscope cscopelist:
-
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile all-local
-installdirs:
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool clean-local mostlyclean-am
-
-distclean: distclean-am
- -rm -f Makefile
-distclean-am: clean-am distclean-generic distclean-local
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am: install-data-local
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am: uninstall-local
-
-.MAKE: install-am install-strip
-
-.PHONY: all all-am all-local check check-am clean clean-generic \
- clean-libtool clean-local cscopelist-am ctags-am distclean \
- distclean-generic distclean-libtool distclean-local distdir \
- dvi dvi-am html html-am info info-am install install-am \
- install-data install-data-am install-data-local install-dvi \
- install-dvi-am install-exec install-exec-am install-html \
- install-html-am install-info install-info-am install-man \
- install-pdf install-pdf-am install-ps install-ps-am \
- install-strip installcheck installcheck-am installdirs \
- maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
- tags-am uninstall uninstall-am uninstall-local
-
-.PRECIOUS: Makefile
-
-all-local: html/index.html
-
-html/index.html: $(srcdir)/../include/uriparser/*.h \
- $(srcdir)/../src/*.c \
- $(srcdir)/../src/*.h \
- $(srcdir)/Mainpage.txt
- rm -Rf "$(builddir)/html"
- doxygen Doxyfile
- touch html/index.html
-
-clean-local:
- rm -Rf "$(builddir)/html"
- rm -f *.qch
-
-distclean-local:
- rm -f config.{log,status} release.sh Doxyfile
-
-install-data-local:
- $(MKDIR_P) "$(DESTDIR)$(docdir)/html/search" ## Didn't work with installdirs-local
- $(INSTALL_DATA) html/*.css html/*.html html/*.js html/*.md5 html/*.png "$(DESTDIR)$(docdir)/html/"
- $(INSTALL_DATA) html/search/* "$(DESTDIR)$(docdir)/html/search/"
- -$(INSTALL_DATA) *.qch "$(DESTDIR)$(docdir)/"
-
-uninstall-local:
- rm -Rf "$(DESTDIR)$(docdir)/html"
- rm -f "$(DESTDIR)$(docdir)"/*.qch
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/doc/release.sh.in b/doc/release.sh.in
index 70a52b4..6a7d09a 100755
--- a/doc/release.sh.in
+++ b/doc/release.sh.in
@@ -2,14 +2,14 @@
(
cd $(dirname $(which "$0")) || exit 1
-distdir="@PACKAGE@-@VERSION@-doc"
+distdir="@PROJECT_NAME@-@PROJECT_VERSION@-doc"
[ -z $MAKE ] && MAKE=make
# Clean up
rm -Rf "${distdir}" "${distdir}.zip"
# Generate
-"${MAKE}" || exit 1
+"${MAKE}" -C .. doc || exit 1
# Copy
mkdir -p "${distdir}/html/search"
diff --git a/doc/rfc1866.htm b/doc/rfc1866.htm
new file mode 100644
index 0000000..108a958
--- /dev/null
+++ b/doc/rfc1866.htm
@@ -0,0 +1,4446 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html lang="en" xml:lang="en">
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=us-ascii" />
+ <meta name="robots" content="index,follow" />
+ <meta name="creator" content="rfcmarkup version 1.60" />
+ <link rel="icon" href="/images/rfc.png" type="image/png" />
+ <link rel="shortcut icon" href="/images/rfc.png" type="image/png" />
+ <title>RFC 1866 - Hypertext Markup Language - 2.0</title>
+
+ <style type="text/css">
+ body {
+ margin: 0px 8px;
+ font-size: 1em;
+ }
+ h1, h2, h3, h4, h5, h6, .h1, .h2, .h3, .h4, .h5, .h6 {
+ font-weight: bold;
+ line-height: 0pt;
+ display: inline;
+ white-space: pre;
+ font-family: monospace;
+ font-size: 1em;
+ font-weight: bold;
+ }
+ pre {
+ font-size: 1em;
+ }
+ .pre {
+ white-space: pre;
+ font-family: monospace;
+ }
+ .header{
+ font-weight: bold;
+ }
+ .invisible {
+ text-decoration: none;
+ color: white;
+ }
+ @media print {
+ body {
+ font-size: 10.5pt;
+ }
+ h1, h2, h3, h4, h5, h6 {
+ font-size: 10.5pt;
+ }
+
+ a:link, a:visited {
+ color: inherit;
+ text-decoration: none;
+ }
+ .break {
+ page-break-before: always;
+ }
+ .noprint {
+ display: none;
+ }
+ }
+ @media screen {
+ .grey, .grey a:link, .grey a:visited {
+ color: #777;
+ }
+ .docinfo {
+ background-color: #EEE;
+ }
+ .top {
+ border-top: 2px solid #EEE;
+ }
+ .bgwhite { background-color: white; }
+ .bgred { background-color: #F44; }
+ .bggrey { background-color: #666; }
+ .bgbrown { background-color: #840; }
+ .bgorange { background-color: #FA0; }
+ .bgyellow { background-color: #EE0; }
+ .bgmagenta{ background-color: #F4F; }
+ .bgblue { background-color: #66F; }
+ .bgcyan { background-color: #4DD; }
+ .bggreen { background-color: #4F4; }
+
+ .legend { font-size: 90%; }
+ .cplate { font-size: 70%; border: solid grey 1px; }
+ }
+ </style>
+
+ <script type="text/javascript"><!--
+ function addHeaderTags() {
+ var spans = document.getElementsByTagName("span");
+ for (var i=0; i < spans.length; i++) {
+ var elem = spans[i];
+ if (elem) {
+ var level = elem.getAttribute("class");
+ if (level == "h1" || level == "h2" || level == "h3" || level == "h4" || level == "h5" || level == "h6") {
+ elem.innerHTML = "<"+level+">"+elem.innerHTML+"</"+level+">";
+ }
+ }
+ }
+ }
+ var legend_html = "Colour legend:<br /> <table> <tr><td>Unknown:</td> <td><span class='cplate bgwhite'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Draft:</td> <td><span class='cplate bgred'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Informational:</td> <td><span class='cplate bgorange'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Experimental:</td> <td><span class='cplate bgyellow'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Best Common Practice:</td><td><span class='cplate bgmagenta'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Proposed Standard:</td><td><span class='cplate bgblue'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Draft Standard:</td> <td><span class='cplate bgcyan'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Standard:</td> <td><span class='cplate bggreen'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Historic:</td> <td><span class='cplate bggrey'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Obsolete:</td> <td><span class='cplate bgbrown'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> </table>";
+ function showElem(id) {
+ var elem = document.getElementById(id);
+ elem.innerHTML = eval(id+"_html");
+ elem.style.visibility='visible';
+ }
+ function hideElem(id) {
+ var elem = document.getElementById(id);
+ elem.style.visibility='hidden';
+ elem.innerHTML = "";
+ }
+ // -->
+ </script>
+</head>
+<body onload="addHeaderTags()">
+ <div style="height: 8px;">
+ <div onmouseover="this.style.cursor='pointer';"
+ onclick="showElem('legend');"
+ onmouseout="hideElem('legend')"
+ style="height: 6px; position: absolute;"
+ class="pre noprint docinfo bgbrown"
+ title="Click for colour legend." > </div>
+ <div id="legend"
+ class="docinfo noprint pre legend"
+ style="position:absolute; top: 4px; left: 4ex; visibility:hidden; background-color: white; padding: 4px 9px 5px 7px; border: solid #345 1px; "
+ onmouseover="showElem('legend');"
+ onmouseout="hideElem('legend');">
+ </div>
+ </div>
+<span class="pre noprint docinfo top">[<a href="../html/" title="Document search and retrieval page">RFCs/IDs</a>] [<a href="/rfc/rfc1866.txt" title="Plaintext version of this document">Plain Text</a>] [From <a href="draft-ietf-html-spec">draft-ietf-html-spec</a>] </span><br />
+<span class="pre noprint docinfo"> </span><br />
+<span class="pre noprint docinfo">Obsoleted by: <a href="./rfc2854">2854</a> HISTORIC</span><br />
+<span class="pre noprint docinfo"> </span><br />
+<pre>
+Network Working Group T. Berners-Lee
+Request for Comments: 1866 MIT/W3C
+Category: Standards Track D. Connolly
+ November 1995
+
+
+ <span class="h1">Hypertext Markup Language - 2.0</span>
+
+Status of this Memo
+
+ This document specifies an Internet standards track protocol for the
+ Internet community, and requests discussion and suggestions for
+ improvements. Please refer to the current edition of the "Internet
+ Official Protocol Standards" (STD 1) for the standardization state
+ and status of this protocol. Distribution of this memo is unlimited.
+
+Abstract
+
+ The Hypertext Markup Language (HTML) is a simple markup language used
+ to create hypertext documents that are platform independent. HTML
+ documents are SGML documents with generic semantics that are
+ appropriate for representing information from a wide range of
+ domains. HTML markup can represent hypertext news, mail,
+ documentation, and hypermedia; menus of options; database query
+ results; simple structured documents with in-lined graphics; and
+ hypertext views of existing bodies of information.
+
+ HTML has been in use by the World Wide Web (WWW) global information
+ initiative since 1990. This specification roughly corresponds to the
+ capabilities of HTML in common use prior to June 1994. HTML is an
+ application of ISO Standard 8879:1986 Information Processing Text and
+ Office Systems; Standard Generalized Markup Language (SGML).
+
+ The "text/html" Internet Media Type (<a href="./rfc1590">RFC 1590</a>) and MIME Content Type
+ (<a href="./rfc1521">RFC 1521</a>) is defined by this specification.
+
+Table of Contents
+
+ <a href="#section-1">1</a>. Introduction ........................................... <a href="#page-2">2</a>
+ <a href="#section-1.1">1.1</a> Scope .................................................. <a href="#page-3">3</a>
+ <a href="#section-1.2">1.2</a> Conformance ............................................ <a href="#page-3">3</a>
+ <a href="#section-2">2</a>. Terms .................................................. <a href="#page-6">6</a>
+ <a href="#section-3">3</a>. HTML as an Application of SGML .........................<a href="#page-10">10</a>
+ <a href="#section-3.1">3.1</a> SGML Documents .........................................<a href="#page-10">10</a>
+ <a href="#section-3.2">3.2</a> HTML Lexical Syntax ................................... <a href="#page-12">12</a>
+ <a href="#section-3.3">3.3</a> HTML Public Text Identifiers .......................... <a href="#page-17">17</a>
+ <a href="#section-3.4">3.4</a> Example HTML Document ................................. <a href="#page-17">17</a>
+ <a href="#section-4">4</a>. HTML as an Internet Media Type ........................ <a href="#page-18">18</a>
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 1]</span>
+<a name="page-2" id="page-2" href="#page-2" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ <a href="#section-4.1">4.1</a> text/html media type .................................. <a href="#page-18">18</a>
+ <a href="#section-4.2">4.2</a> HTML Document Representation .......................... <a href="#page-19">19</a>
+ <a href="#section-5">5</a>. Document Structure .................................... <a href="#page-20">20</a>
+ <a href="#section-5.1">5.1</a> Document Element: HTML ................................ <a href="#page-21">21</a>
+ <a href="#section-5.2">5.2</a> Head: HEAD ............................................ <a href="#page-21">21</a>
+ <a href="#section-5.3">5.3</a> Body: BODY ............................................ <a href="#page-24">24</a>
+ <a href="#section-5.4">5.4</a> Headings: H1 ... H6 ................................... <a href="#page-24">24</a>
+ <a href="#section-5.5">5.5</a> Block Structuring Elements ............................ <a href="#page-25">25</a>
+ <a href="#section-5.6">5.6</a> List Elements ......................................... <a href="#page-28">28</a>
+ <a href="#section-5.7">5.7</a> Phrase Markup ......................................... <a href="#page-30">30</a>
+ <a href="#section-5.8">5.8</a> Line Break: BR ........................................ <a href="#page-34">34</a>
+ <a href="#section-5.9">5.9</a> Horizontal Rule: HR ................................... <a href="#page-34">34</a>
+ <a href="#section-5.10">5.10</a> Image: IMG ............................................ <a href="#page-34">34</a>
+ <a href="#section-6">6</a>. Characters, Words, and Paragraphs ..................... <a href="#page-35">35</a>
+ <a href="#section-6.1">6.1</a> The HTML Document Character Set ....................... <a href="#page-36">36</a>
+ <a href="#section-7">7</a>. Hyperlinks ............................................ <a href="#page-36">36</a>
+ <a href="#section-7.1">7.1</a> Accessing Resources ................................... <a href="#page-37">37</a>
+ <a href="#section-7.2">7.2</a> Activation of Hyperlinks .............................. <a href="#page-38">38</a>
+ <a href="#section-7.3">7.3</a> Simultaneous Presentation of Image Resources .......... <a href="#page-38">38</a>
+ <a href="#section-7.4">7.4</a> Fragment Identifiers .................................. <a href="#page-38">38</a>
+ <a href="#section-7.5">7.5</a> Queries and Indexes ................................... <a href="#page-39">39</a>
+ <a href="#section-7.6">7.6</a> Image Maps ............................................ <a href="#page-39">39</a>
+ <a href="#section-8">8</a>. Forms ................................................. <a href="#page-40">40</a>
+ <a href="#section-8.1">8.1</a> Form Elements ......................................... <a href="#page-40">40</a>
+ <a href="#section-8.2">8.2</a> Form Submission ....................................... <a href="#page-45">45</a>
+ <a href="#section-9">9</a>. HTML Public Text ...................................... <a href="#page-49">49</a>
+ <a href="#section-9.1">9.1</a> HTML DTD .............................................. <a href="#page-49">49</a>
+ <a href="#section-9.2">9.2</a> Strict HTML DTD ....................................... <a href="#page-61">61</a>
+ <a href="#section-9.3">9.3</a> Level 1 HTML DTD ...................................... <a href="#page-62">62</a>
+ <a href="#section-9.4">9.4</a> Strict Level 1 HTML DTD ............................... <a href="#page-63">63</a>
+ <a href="#section-9.5">9.5</a> SGML Declaration for HTML ............................. <a href="#page-64">64</a>
+ <a href="#section-9.6">9.6</a> Sample SGML Open Entity Catalog for HTML .............. <a href="#page-65">65</a>
+ <a href="#section-9.7">9.7</a> Character Entity Sets ................................. <a href="#page-66">66</a>
+ <a href="#section-10">10</a>. Security Considerations ............................... <a href="#page-69">69</a>
+ <a href="#section-11">11</a>. References ............................................ <a href="#page-69">69</a>
+ <a href="#section-12">12</a>. Acknowledgments ....................................... <a href="#page-71">71</a>
+ <a href="#section-12.1">12.1</a> Authors' Addresses .................................... <a href="#page-71">71</a>
+ <a href="#section-13">13</a>. The HTML Coded Character Set .......................... <a href="#page-72">72</a>
+ <a href="#section-14">14</a>. Proposed Entities ..................................... <a href="#page-75">75</a>
+
+<span class="h2"><a name="section-1">1</a>. Introduction</span>
+
+ The HyperText Markup Language (HTML) is a simple data format used to
+ create hypertext documents that are portable from one platform to
+ another. HTML documents are SGML documents with generic semantics
+ that are appropriate for representing information from a wide range
+ of domains.
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 2]</span>
+<a name="page-3" id="page-3" href="#page-3" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ As HTML is an application of SGML, this specification assumes a
+ working knowledge of [<a href="#ref-SGML">SGML</a>].
+
+<span class="h3"><a name="section-1.1">1.1</a>. Scope</span>
+
+ HTML has been in use by the World-Wide Web (WWW) global information
+ initiative since 1990. Previously, informal documentation on HTML has
+ been available from a number of sources on the Internet. This
+ specification brings together, clarifies, and formalizes a set of
+ features that roughly corresponds to the capabilities of HTML in
+ common use prior to June 1994. A number of new features to HTML are
+ being proposed and experimented in the Internet community.
+
+ This document thus defines a HTML 2.0 (to distinguish it from the
+ previous informal specifications). Future (generally upwardly
+ compatible) versions of HTML with new features will be released with
+ higher version numbers.
+
+ HTML is an application of ISO Standard 8879:1986, "Information
+ Processing Text and Office Systems; Standard Generalized Markup
+ Language" (SGML). The HTML Document Type Definition (DTD) is a formal
+ definition of the HTML syntax in terms of SGML.
+
+ This specification also defines HTML as an Internet Media
+ Type[IMEDIA] and MIME Content Type[MIME] called `text/html'. As such,
+ it defines the semantics of the HTML syntax and how that syntax
+ should be interpreted by user agents.
+
+<span class="h3"><a name="section-1.2">1.2</a>. Conformance</span>
+
+ This specification governs the syntax of HTML documents and aspects
+ of the behavior of HTML user agents.
+
+<span class="h4"><a name="section-1.2.1">1.2.1</a>. Documents</span>
+
+ A document is a conforming HTML document if:
+
+ * It is a conforming SGML document, and it conforms to the
+ HTML DTD (see 9.1, "HTML DTD").
+
+ NOTE - There are a number of syntactic idioms that
+ are not supported or are supported inconsistently in
+ some historical user agent implementations. These
+ idioms are identified in notes like this throughout
+ this specification.
+
+ * It conforms to the application conventions in this
+ specification. For example, the value of the HREF attribute
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 3]</span>
+<a name="page-4" id="page-4" href="#page-4" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ of the &lt;A&gt; element must conform to the URI syntax.
+
+ * Its document character set includes [<a href="#ref-ISO-8859-1">ISO-8859-1</a>] and
+ agrees with [<a href="#ref-ISO-10646">ISO-10646</a>]; that is, each code position listed
+ in 13, "The HTML Coded Character Set" is included, and each
+ code position in the document character set is mapped to the
+ same character as [<a href="#ref-ISO-10646">ISO-10646</a>] designates for that code
+ position.
+
+ NOTE - The document character set is somewhat
+ independent of the character encoding scheme used to
+ represent a document. For example, the `ISO-2022-JP'
+ character encoding scheme can be used for HTML
+ documents, since its repertoire is a subset of the
+ [<a href="#ref-ISO-10646">ISO-10646</a>] repertoire. The critical distinction is
+ that numeric character references agree with
+ [<a href="#ref-ISO-10646">ISO-10646</a>] regardless of how the document is
+ encoded.
+
+<span class="h4"><a name="section-1.2.2">1.2.2</a>. Feature Test Entities</span>
+
+ The HTML DTD defines a standard HTML document type and several
+ variations, by way of feature test entities. Feature test entities
+ are declarations in the HTML DTD that control the inclusion or
+ exclusion of portions of the DTD.
+
+ HTML.Recommended
+ Certain features of the language are necessary for
+ compatibility with widespread usage, but they may
+ compromise the structural integrity of a document. This
+ feature test entity selects a more prescriptive document
+ type definition that eliminates those features. It is
+ set to `IGNORE' by default.
+
+ For example, in order to preserve the structure of a
+ document, an editing user agent may translate HTML
+ documents to the recommended subset, or it may require
+ that the documents be in the recommended subset for
+ import.
+
+ HTML.Deprecated
+ Certain features of the language are necessary for
+ compatibility with earlier versions of the
+ specification, but they tend to be used and implemented
+ inconsistently, and their use is deprecated. This
+ feature test entity enables a document type definition
+ that allows these features. It is set to `INCLUDE' by
+ default.
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 4]</span>
+<a name="page-5" id="page-5" href="#page-5" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ Documents generated by translation software or editing
+ software should not contain deprecated idioms.
+
+<span class="h4"><a name="section-1.2.3">1.2.3</a>. User Agents</span>
+
+ An HTML user agent conforms to this specification if:
+
+ * It parses the characters of an HTML document into data
+ characters and markup according to [<a href="#ref-SGML">SGML</a>].
+
+ NOTE - In the interest of robustness and
+ extensibility, there are a number of widely deployed
+ conventions for handling non-conforming documents.
+ See 4.2.1, "Undeclared Markup Error Handling" for
+ details.
+
+ * It supports the `ISO-8859-1' character encoding scheme and
+ processes each character in the ISO Latin Alphabet No. 1 as
+ specified in 6.1, "The HTML Document Character Set".
+
+ NOTE - To support non-western writing systems, HTML
+ user agents are encouraged to support
+ `ISO-10646-UCS-2' or similar character encoding
+ schemes and as much of the character repertoire of
+ [<a href="#ref-ISO-10646">ISO-10646</a>] as is practical.
+
+ * It behaves identically for documents whose parsed token
+ sequences are identical.
+
+ For example, comments and the whitespace in tags disappear
+ during tokenization, and hence they do not influence the
+ behavior of conforming user agents.
+
+ * It allows the user to traverse (or at least attempt to
+ traverse, resources permitting) all hyperlinks from &lt;A&gt;
+ elements in an HTML document.
+
+ An HTML user agent is a level 2 user agent if, additionally:
+
+ * It allows the user to express all form field values
+ specified in an HTML document and to (attempt to) submit the
+ values as requests to information services.
+
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 5]</span>
+<a name="page-6" id="page-6" href="#page-6" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h2"><a name="section-2">2</a>. Terms</span>
+
+ absolute URI
+ a URI in absolute form; for example, as per [<a href="#ref-URL" title='"Uniform Resource Locators (URL)"'>URL</a>]
+
+ anchor
+ one of two ends of a hyperlink; typically, a phrase
+ marked as an &lt;A&gt; element.
+
+ base URI
+ an absolute URI used in combination with a relative URI
+ to determine another absolute URI.
+
+ character
+ An atom of information, for example a letter or a digit.
+ Graphic characters have associated glyphs, whereas
+ control characters have associated processing semantics.
+
+ character encoding
+ scheme
+ A function whose domain is the set of sequences of
+ octets, and whose range is the set of sequences of
+ characters from a character repertoire; that is, a
+ sequence of octets and a character encoding scheme
+ determines a sequence of characters.
+
+ character repertoire
+ A finite set of characters; e.g. the range of a coded
+ character set.
+
+ code position
+ An integer. A coded character set and a code position
+ from its domain determine a character.
+
+ coded character set
+ A function whose domain is a subset of the integers and
+ whose range is a character repertoire. That is, for some
+ set of integers (usually of the form {0, 1, 2, ..., N}
+ ), a coded character set and an integer in that set
+ determine a character. Conversely, a character and a
+ coded character set determine the character's code
+ position (or, in rare cases, a few code positions).
+
+ conforming HTML user
+ agent
+ A user agent that conforms to this specification in its
+ processing of the Internet Media Type `text/html'.
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 6]</span>
+<a name="page-7" id="page-7" href="#page-7" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ data character
+ Characters other than markup, which make up the content
+ of elements.
+
+ document character set
+ a coded character set whose range includes all
+ characters used in a document. Every SGML document has
+ exactly one document character set. Numeric character
+ references are resolved via the document character set.
+
+ DTD
+ document type definition. Rules that apply SGML to the
+ markup of documents of a particular type, including a
+ set of element and entity declarations. [<a href="#ref-SGML">SGML</a>]
+
+ element
+ A component of the hierarchical structure defined by a
+ document type definition; it is identified in a document
+ instance by descriptive markup, usually a start-tag and
+ end-tag. [<a href="#ref-SGML">SGML</a>]
+
+ end-tag
+ Descriptive markup that identifies the end of an
+ element. [<a href="#ref-SGML">SGML</a>]
+
+ entity
+ data with an associated notation or interpretation; for
+ example, a sequence of octets associated with an
+ Internet Media Type. [<a href="#ref-SGML">SGML</a>]
+
+ fragment identifier
+ the portion of an HREF attribute value following the `#'
+ character which modifies the presentation of the
+ destination of a hyperlink.
+
+ form data set
+ a sequence of name/value pairs; the names are given by
+ an HTML document and the values are given by a user.
+
+ HTML document
+ An SGML document conforming to this document type
+ definition.
+
+ hyperlink
+ a relationship between two anchors, called the head and
+ the tail. The link goes from the tail to the head. The
+ head and tail are also known as destination and source,
+ respectively.
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 7]</span>
+<a name="page-8" id="page-8" href="#page-8" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ markup
+ Syntactically delimited characters added to the data of
+ a document to represent its structure. There are four
+ different kinds of markup: descriptive markup (tags),
+ references, markup declarations, and processing
+ instructions. [<a href="#ref-SGML">SGML</a>]
+
+ may
+ A document or user interface is conforming whether this
+ statement applies or not.
+
+ media type
+ an Internet Media Type, as per [<a href="#ref-IMEDIA" title='"Media Type Registration Procedure"'>IMEDIA</a>].
+
+ message entity
+ a head and body. The head is a collection of name/value
+ fields, and the body is a sequence of octets. The head
+ defines the content type and content transfer encoding
+ of the body. [<a href="#ref-MIME" title='"MIME (Multipurpose Internet Mail Extensions) Part One: Mechanisms for Specifying and Describing the Format of Internet Message Bodies"'>MIME</a>]
+
+ minimally conforming
+ HTML user agent
+ A user agent that conforms to this specification except
+ for form processing. It may only process level 1 HTML
+ documents.
+
+ must
+ Documents or user agents in conflict with this statement
+ are not conforming.
+
+ numeric character
+ reference
+ markup that refers to a character by its code position
+ in the document character set.
+
+ SGML document
+ A sequence of characters organized physically as a set
+ of entities and logically into a hierarchy of elements.
+ An SGML document consists of data characters and markup;
+ the markup describes the structure of the information
+ and an instance of that structure. [<a href="#ref-SGML">SGML</a>]
+
+ shall
+ If a document or user agent conflicts with this
+ statement, it does not conform to this specification.
+
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 8]</span>
+<a name="page-9" id="page-9" href="#page-9" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ should
+ If a document or user agent conflicts with this
+ statement, undesirable results may occur in practice
+ even though it conforms to this specification.
+
+ start-tag
+ Descriptive markup that identifies the start of an
+ element and specifies its generic identifier and
+ attributes. [<a href="#ref-SGML">SGML</a>]
+
+ syntax-reference
+ character set
+ A coded character set whose range includes all
+ characters used for markup; e.g. name characters and
+ delimiter characters.
+
+ tag
+ Markup that delimits an element. A tag includes a name
+ which refers to an element declaration in the DTD, and
+ may include attributes. [<a href="#ref-SGML">SGML</a>]
+
+ text entity
+ A finite sequence of characters. A text entity typically
+ takes the form of a sequence of octets with some
+ associated character encoding scheme, transmitted over
+ the network or stored in a file. [<a href="#ref-SGML">SGML</a>]
+
+ typical
+ Typical processing is described for many elements. This
+ is not a mandatory part of the specification but is
+ given as guidance for designers and to help explain the
+ uses for which the elements were intended.
+
+ URI
+ A Uniform Resource Identifier is a formatted string that
+ serves as an identifier for a resource, typically on the
+ Internet. URIs are used in HTML to identify the anchors
+ of hyperlinks. URIs in common practice include Uniform
+ Resource Locators (URLs)[<a href="#ref-URL" title='"Uniform Resource Locators (URL)"'>URL</a>] and Relative URLs
+ [<a href="#ref-RELURL" title='"Relative Uniform Resource Locators"'>RELURL</a>].
+
+ user agent
+ A component of a distributed system that presents an
+ interface and processes requests on behalf of a user;
+ for example, a www browser or a mail user agent.
+
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 9]</span>
+<a name="page-10" id="page-10" href="#page-10" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ WWW
+ The World-Wide Web is a hypertext-based, distributed
+ information system created by researchers at CERN in
+ Switzerland. &lt;URL:http://www.w3.org/&gt;
+
+<span class="h2"><a name="section-3">3</a>. HTML as an Application of SGML</span>
+
+ HTML is an application of ISO 8879:1986 -- Standard Generalized
+ Markup Language (SGML). SGML is a system for defining structured
+ document types and markup languages to represent instances of those
+ document types[SGML]. The public text -- DTD and SGML declaration --
+ of the HTML document type definition are provided in 9, "HTML Public
+ Text".
+
+ The term "HTML" refers to both the document type defined here and the
+ markup language for representing instances of this document type.
+
+<span class="h3"><a name="section-3.1">3.1</a>. SGML Documents</span>
+
+ An HTML document is an SGML document; that is, a sequence of
+ characters organized physically into a set of entities, and logically
+ as a hierarchy of elements.
+
+ In the SGML specification, the first production of the SGML syntax
+ grammar separates an SGML document into three parts: an SGML
+ declaration, a prologue, and an instance. For the purposes of this
+ specification, the prologue is a DTD. This DTD describes another
+ grammar: the start symbol is given in the doctype declaration, the
+ terminals are data characters and tags, and the productions are
+ determined by the element declarations. The instance must conform to
+ the DTD, that is, it must be in the language defined by this grammar.
+
+ The SGML declaration determines the lexicon of the grammar. It
+ specifies the document character set, which determines a character
+ repertoire that contains all characters that occur in all text
+ entities in the document, and the code positions associated with
+ those characters.
+
+ The SGML declaration also specifies the syntax-reference character
+ set of the document, and a few other parameters that bind the
+ abstract syntax of SGML to a concrete syntax. This concrete syntax
+ determines how the sequence of characters of the document is mapped
+ to a sequence of terminals in the grammar of the prologue.
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 10]</span>
+<a name="page-11" id="page-11" href="#page-11" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ For example, consider the following document:
+
+ &lt;!DOCTYPE html PUBLIC "-//IETF//DTD HTML 2.0//EN"&gt;
+ &lt;title&gt;Parsing Example&lt;/title&gt;
+ &lt;p&gt;Some text. &lt;em&gt;&amp;#42;wow&amp;#42;&lt;/em&gt;&lt;/p&gt;
+
+ An HTML user agent should use the SGML declaration that is given in
+ 9.5, "SGML Declaration for HTML". According to its document character
+ set, `&amp;#42;' refers to an asterisk character, `*'.
+
+ The instance above is regarded as the following sequence of
+ terminals:
+
+ 1. start-tag: TITLE
+
+ 2. data characters: "Parsing Example"
+
+ 3. end-tag: TITLE
+
+ 4. start-tag: P
+
+ 5. data characters "Some text."
+
+ 6. start-tag: EM
+
+ 7. data characters: "*wow*"
+
+ 8. end-tag: EM
+
+ 9. end-tag: P
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 11]</span>
+<a name="page-12" id="page-12" href="#page-12" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ The start symbol of the DTD grammar is HTML, and the productions are
+ given in the public text identified by `-//IETF//DTD HTML 2.0//EN'
+ (9.1, "HTML DTD"). The terminals above parse as:
+
+ HTML
+ |
+ \-HEAD
+ | |
+ | \-TITLE
+ | |
+ | \-&lt;TITLE&gt;
+ | |
+ | \-"Parsing Example"
+ | |
+ | \-&lt;/TITLE&gt;
+ |
+ \-BODY
+ |
+ \-P
+ |
+ \-&lt;P&gt;
+ |
+ \-"Some text. "
+ |
+ \-EM
+ | |
+ | \-&lt;EM&gt;
+ | |
+ | \-"*wow*"
+ | |
+ | \-&lt;/EM&gt;
+ |
+ \-&lt;/P&gt;
+
+ Some of the elements are delimited explicitly by tags, while the
+ boundaries of others are inferred. The &lt;HTML&gt; element contains a
+ &lt;HEAD&gt; element and a &lt;BODY&gt; element. The &lt;HEAD&gt; contains &lt;TITLE&gt;,
+ which is explicitly delimited by start- and end-tags.
+
+<span class="h3"><a name="section-3.2">3.2</a>. HTML Lexical Syntax</span>
+
+ SGML specifies an abstract syntax and a reference concrete syntax.
+ Aside from certain quantities and capacities (e.g. the limit on the
+ length of a name), all HTML documents use the reference concrete
+ syntax. In particular, all markup characters are in the repertoire of
+ [<a href="#ref-ISO-646" title='"./rfc1866"'>ISO-646</a>]. Data characters are drawn from the document character set
+ (see 6, "Characters, Words, and Paragraphs").
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 12]</span>
+<a name="page-13" id="page-13" href="#page-13" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ A complete discussion of SGML parsing, e.g. the mapping of a sequence
+ of characters to a sequence of tags and data, is left to the SGML
+ standard[SGML]. This section is only a summary.
+
+<span class="h4"><a name="section-3.2.1">3.2.1</a>. Data Characters</span>
+
+ Any sequence of characters that do not constitute markup (see 9.6
+ "Delimiter Recognition" of [<a href="#ref-SGML">SGML</a>]) are mapped directly to strings of
+ data characters. Some markup also maps to data character strings.
+ Numeric character references map to single-character strings, via the
+ document character set. Each reference to one of the general entities
+ defined in the HTML DTD maps to a single-character string.
+
+ For example,
+
+ abc&amp;lt;def =&gt; "abc","&lt;","def"
+ abc&amp;#60;def =&gt; "abc","&lt;","def"
+
+ The terminating semicolon on entity or numeric character references
+ is only necessary when the character following the reference would
+ otherwise be recognized as part of the name (see 9.4.5 "Reference
+ End" in [<a href="#ref-SGML">SGML</a>]).
+
+ abc &amp;lt def =&gt; "abc ","&lt;"," def"
+ abc &amp;#60 def =&gt; "abc ","&lt;"," def"
+
+ An ampersand is only recognized as markup when it is followed by a
+ letter or a `#' and a digit:
+
+ abc &amp; lt def =&gt; "abc &amp; lt def"
+ abc &amp;# 60 def =&gt; "abc &amp;# 60 def"
+
+ A useful technique for translating plain text to HTML is to replace
+ each '&lt;', '&amp;', and '&gt;' by an entity reference or numeric character
+ reference as follows:
+
+ ENTITY NUMERIC
+ CHARACTER REFERENCE CHAR REF CHARACTER DESCRIPTION
+ --------- ---------- ----------- ---------------------
+ &amp; &amp;amp; &amp;#38; Ampersand
+ &lt; &amp;lt; &amp;#60; Less than
+ &gt; &amp;gt; &amp;#62; Greater than
+
+ NOTE - There are SGML mechanisms, CDATA and RCDATA
+ declared content, that allow most `&lt;', `&gt;', and `&amp;'
+ characters to be entered without the use of entity
+ references. Because these mechanisms tend to be used and
+ implemented inconsistently, and because they conflict
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 13]</span>
+<a name="page-14" id="page-14" href="#page-14" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ with techniques for reducing HTML to 7 bit ASCII for
+ transport, they are deprecated in this version of HTML.
+ See 5.5.2.1, "Example and Listing: XMP, LISTING".
+
+<span class="h4"><a name="section-3.2.2">3.2.2</a>. Tags</span>
+
+ Tags delimit elements such as headings, paragraphs, lists, character
+ highlighting, and links. Most HTML elements are identified in a
+ document as a start-tag, which gives the element name and attributes,
+ followed by the content, followed by the end tag. Start-tags are
+ delimited by `&lt;' and `&gt;'; end tags are delimited by `&lt;/' and `&gt;'. An
+ example is:
+
+ &lt;H1&gt;This is a Heading&lt;/H1&gt;
+
+ Some elements only have a start-tag without an end-tag. For example,
+ to create a line break, use the `&lt;BR&gt;' tag. Additionally, the end
+ tags of some other elements, such as Paragraph (`&lt;/P&gt;'), List Item
+ (`&lt;/LI&gt;'), Definition Term (`&lt;/DT&gt;'), and Definition Description
+ (`&lt;/DD&gt;') elements, may be omitted.
+
+ The content of an element is a sequence of data character strings and
+ nested elements. Some elements, such as anchors, cannot be nested.
+ Anchors and character highlighting may be put inside other
+ constructs. See the HTML DTD, 9.1, "HTML DTD" for full details.
+
+ NOTE - The SGML declaration for HTML specifies SHORTTAG YES, which
+ means that there are other valid syntaxes for tags, such as NET
+ tags, `&lt;EM/.../'; empty start tags, `&lt;&gt;'; and empty end-tags,
+ `&lt;/&gt;'. Until support for these idioms is widely deployed, their
+ use is strongly discouraged.
+
+<span class="h4"><a name="section-3.2.3">3.2.3</a>. Names</span>
+
+ A name consists of a letter followed by letters, digits, periods, or
+ hyphens. The length of a name is limited to 72 characters by the
+ `NAMELEN' parameter in the SGML declaration for HTML, 9.5, "SGML
+ Declaration for HTML". Element and attribute names are not case
+ sensitive, but entity names are. For example, `&lt;BLOCKQUOTE&gt;',
+ `&lt;BlockQuote&gt;', and `&lt;blockquote&gt;' are equivalent, whereas `&amp;amp;' is
+ different from `&amp;AMP;'.
+
+ In a start-tag, the element name must immediately follow the tag open
+ delimiter `&lt;'.
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 14]</span>
+<a name="page-15" id="page-15" href="#page-15" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h4"><a name="section-3.2.4">3.2.4</a>. Attributes</span>
+
+ In a start-tag, white space and attributes are allowed between the
+ element name and the closing delimiter. An attribute specification
+ typically consists of an attribute name, an equal sign, and a value,
+ though some attribute specifications may be just a name token. White
+ space is allowed around the equal sign.
+
+ The value of the attribute may be either:
+
+ * A string literal, delimited by single quotes or double
+ quotes and not containing any occurrences of the delimiting
+ character.
+
+ NOTE - Some historical implementations consider any
+ occurrence of the `&gt;' character to signal the end of
+ a tag. For compatibility with such implementations,
+ when `&gt;' appears in an attribute value, it should be
+ represented with a numeric character reference. For
+ example, `&lt;IMG SRC="eq1.jpg" alt="a&gt;b"&gt;' should be
+ written `&lt;IMG SRC="eq1.jpg" alt="a&amp;#62;b"&gt;' or `&lt;IMG
+ SRC="eq1.jpg" alt="a&amp;gt;b"&gt;'.
+
+ * A name token (a sequence of letters, digits, periods, or
+ hyphens). Name tokens are not case sensitive.
+
+ NOTE - Some historical implementations allow any
+ character except space or `&gt;' in a name token.
+
+ In this example, &lt;img&gt; is the element name, src is the attribute
+ name, and `http://host/dir/file.gif' is the attribute value:
+
+ &lt;img src='http://host/dir/file.gif'&gt;
+
+ A useful technique for computing an attribute value literal for a
+ given string is to replace each quote and white space character by an
+ entity reference or numeric character reference as follows:
+
+ ENTITY NUMERIC
+ CHARACTER REFERENCE CHAR REF CHARACTER DESCRIPTION
+ --------- ---------- ----------- ---------------------
+ HT &amp;#9; Tab
+ LF &amp;#10; Line Feed
+ CR &amp;#13; Carriage Return
+ SP &amp;#32; Space
+ " &amp;quot; &amp;#34; Quotation mark
+ &amp; &amp;amp; &amp;#38; Ampersand
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 15]</span>
+<a name="page-16" id="page-16" href="#page-16" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ For example:
+
+ &lt;IMG SRC="image.jpg" alt="First &amp;quot;real&amp;quot; example"&gt;
+
+ The `NAMELEN' parameter in the SGML declaration (9.5, "SGML
+ Declaration for HTML") limits the length of an attribute value to
+ 1024 characters.
+
+ Attributes such as ISMAP and COMPACT may be written using a minimized
+ syntax (see 7.9.1.2 "Omitted Attribute Name" in [<a href="#ref-SGML">SGML</a>]). The markup:
+
+ &lt;UL COMPACT="compact"&gt;
+
+ can be written using a minimized syntax:
+
+ &lt;UL COMPACT&gt;
+
+ NOTE - Some historical implementations only understand the minimized
+ syntax.
+
+<span class="h4"><a name="section-3.2.5">3.2.5</a>. Comments</span>
+
+ To include comments in an HTML document, use a comment declaration. A
+ comment declaration consists of `&lt;!' followed by zero or more
+ comments followed by `&gt;'. Each comment starts with `--' and includes
+ all text up to and including the next occurrence of `--'. In a
+ comment declaration, white space is allowed after each comment, but
+ not before the first comment. The entire comment declaration is
+ ignored.
+
+ NOTE - Some historical HTML implementations incorrectly consider
+ any `&gt;' character to be the termination of a comment.
+
+ For example:
+
+ &lt;!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"&gt;
+ &lt;HEAD&gt;
+ &lt;TITLE&gt;HTML Comment Example&lt;/TITLE&gt;
+ &lt;!-- Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp --&gt;
+ &lt;!-- another -- -- comment --&gt;
+ &lt;!&gt;
+ &lt;/HEAD&gt;
+ &lt;BODY&gt;
+ &lt;p&gt; &lt;!- not a comment, just regular old data characters -&gt;
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 16]</span>
+<a name="page-17" id="page-17" href="#page-17" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h3"><a name="section-3.3">3.3</a>. HTML Public Text Identifiers</span>
+
+ To identify information as an HTML document conforming to this
+ specification, each document must start with one of the following
+ document type declarations.
+
+ &lt;!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"&gt;
+
+ This document type declaration refers to the HTML DTD in 9.1, "HTML
+ DTD".
+
+ NOTE - If the body of a `text/html' message entity does not begin
+ with a document type declaration, an HTML user agent should infer
+ the above document type declaration.
+
+ &lt;!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0 Level 2//EN"&gt;
+
+ This document type declaration also refers to the HTML DTD which
+ appears in 9.1, "HTML DTD".
+
+ &lt;!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0 Level 1//EN"&gt;
+
+ This document type declaration refers to the level 1 HTML DTD in 9.3,
+ "Level 1 HTML DTD". Form elements must not occur in level 1
+ documents.
+
+ &lt;!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0 Strict//EN"&gt;
+ &lt;!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0 Strict Level 1//EN"&gt;
+
+ These two document type declarations refer to the HTML DTD in 9.2,
+ "Strict HTML DTD" and 9.4, "Strict Level 1 HTML DTD". They refer to
+ the more structurally rigid definition of HTML.
+
+ HTML user agents may support other document types. In particular,
+ they may support other formal public identifiers, or other document
+ types altogether. They may support an internal declaration subset
+ with supplemental entity, element, and other markup declarations.
+
+<span class="h3"><a name="section-3.4">3.4</a>. Example HTML Document</span>
+
+ &lt;!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"&gt;
+ &lt;HTML&gt;
+ &lt;!-- Here's a good place to put a comment. --&gt;
+ &lt;HEAD&gt;
+ &lt;TITLE&gt;Structural Example&lt;/TITLE&gt;
+ &lt;/HEAD&gt;&lt;BODY&gt;
+ &lt;H1&gt;First Header&lt;/H1&gt;
+ &lt;P&gt;This is a paragraph in the example HTML file. Keep in mind
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 17]</span>
+<a name="page-18" id="page-18" href="#page-18" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ that the title does not appear in the document text, but that
+ the header (defined by H1) does.&lt;/P&gt;
+ &lt;OL&gt;
+ &lt;LI&gt;First item in an ordered list.
+ &lt;LI&gt;Second item in an ordered list.
+ &lt;UL COMPACT&gt;
+ &lt;LI&gt; Note that lists can be nested;
+ &lt;LI&gt; Whitespace may be used to assist in reading the
+ HTML source.
+ &lt;/UL&gt;
+ &lt;LI&gt;Third item in an ordered list.
+ &lt;/OL&gt;
+ &lt;P&gt;This is an additional paragraph. Technically, end tags are
+ not required for paragraphs, although they are allowed. You can
+ include character highlighting in a paragraph. &lt;EM&gt;This sentence
+ of the paragraph is emphasized.&lt;/EM&gt; Note that the &amp;lt;/P&amp;gt;
+ end tag has been omitted.
+ &lt;P&gt;
+ &lt;IMG SRC ="triangle.xbm" alt="Warning: "&gt;
+ Be sure to read these &lt;b&gt;bold instructions&lt;/b&gt;.
+ &lt;/BODY&gt;&lt;/HTML&gt;
+
+<span class="h2"><a name="section-4">4</a>. HTML as an Internet Media Type</span>
+
+ An HTML user agent allows users to interact with resources which have
+ HTML representations. At a minimum, it must allow users to examine
+ and navigate the content of HTML level 1 documents. HTML user agents
+ should be able to preserve all formatting distinctions represented in
+ an HTML document, and be able to simultaneously present resources
+ referred to by IMG elements (they may ignore some formatting
+ distinctions or IMG resources at the request of the user). Level 2
+ HTML user agents should support form entry and submission.
+
+<span class="h3"><a name="section-4.1">4.1</a>. text/html media type</span>
+
+ This specification defines the Internet Media Type [<a href="#ref-IMEDIA" title='"Media Type Registration Procedure"'>IMEDIA</a>] (formerly
+ referred to as the Content Type [<a href="#ref-MIME" title='"MIME (Multipurpose Internet Mail Extensions) Part One: Mechanisms for Specifying and Describing the Format of Internet Message Bodies"'>MIME</a>]) called `text/html'. The
+ following is to be registered with [<a href="#ref-IANA" title='"Assigned Numbers"'>IANA</a>].
+
+ Media Type name
+ text
+
+ Media subtype name
+ html
+
+ Required parameters
+ none
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 18]</span>
+<a name="page-19" id="page-19" href="#page-19" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ Optional parameters
+ level, charset
+
+ Encoding considerations
+ any encoding is allowed
+
+ Security considerations
+ see 10, "Security Considerations"
+
+ The optional parameters are defined as follows:
+
+ Level
+ The level parameter specifies the feature set used in
+ the document. The level is an integer number, implying
+ that any features of same or lower level may be present
+ in the document. Level 1 is all features defined in this
+ specification except those that require the &lt;FORM&gt;
+ element. Level 2 includes form processing. Level 2 is
+ the default.
+
+ Charset
+ The charset parameter (as defined in <a href="./rfc1521#section-7.1.1">section&nbsp;7.1.1 of
+ RFC 1521</a>[<a href="#ref-MIME" title='"MIME (Multipurpose Internet Mail Extensions) Part One: Mechanisms for Specifying and Describing the Format of Internet Message Bodies"'>MIME</a>]) may be given to specify the character
+ encoding scheme used to represent the HTML document as a
+ sequence of octets. The default value is outside the
+ scope of this specification; but for example, the
+ default is `US-ASCII' in the context of MIME mail, and
+ `ISO-8859-1' in the context of HTTP [<a href="#ref-HTTP" title='"Hypertext Transfer Protocol - HTTP/1.0"'>HTTP</a>].
+
+<span class="h3"><a name="section-4.2">4.2</a>. HTML Document Representation</span>
+
+ A message entity with a content type of `text/html' represents an
+ HTML document, consisting of a single text entity. The `charset'
+ parameter (whether implicit or explicit) identifies a character
+ encoding scheme. The text entity consists of the characters
+ determined by this character encoding scheme and the octets of the
+ body of the message entity.
+
+<span class="h4"><a name="section-4.2.1">4.2.1</a>. Undeclared Markup Error Handling</span>
+
+ To facilitate experimentation and interoperability between
+ implementations of various versions of HTML, the installed base of
+ HTML user agents supports a superset of the HTML 2.0 language by
+ reducing it to HTML 2.0: markup in the form of a start-tag or end-
+ tag, whose generic identifier is not declared is mapped to nothing
+ during tokenization. Undeclared attributes are treated similarly. The
+ entire attribute specification of an unknown attribute (i.e., the
+ unknown attribute and its value, if any) should be ignored. On the
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 19]</span>
+<a name="page-20" id="page-20" href="#page-20" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ other hand, references to undeclared entities should be treated as
+ data characters.
+
+ For example:
+
+ &lt;div class=chapter&gt;&lt;h1&gt;foo&lt;/h1&gt;&lt;p&gt;...&lt;/div&gt;
+ =&gt; &lt;H1&gt;,"foo",&lt;/H1&gt;,&lt;P&gt;,"..."
+ xxx &lt;P ID=z23&gt; yyy
+ =&gt; "xxx ",&lt;P&gt;," yyy
+ Let &amp;alpha; &amp;amp; &amp;beta; be finite sets.
+ =&gt; "Let &amp;alpha; &amp; &amp;beta; be finite sets."
+
+ Support for notifying the user of such errors is encouraged.
+
+ Information providers are warned that this convention is not binding:
+ unspecified behavior may result, as such markup does not conform to
+ this specification.
+
+<span class="h4"><a name="section-4.2.2">4.2.2</a>. Conventional Representation of Newlines</span>
+
+ SGML specifies that a text entity is a sequence of records, each
+ beginning with a record start character and ending with a record end
+ character (code positions 10 and 13 respectively) (<a href="#section-7.6.1">section 7.6.1</a>,
+ "Record Boundaries" in [<a href="#ref-SGML">SGML</a>]).
+
+ [<a name="ref-MIME" id="ref-MIME">MIME</a>] specifies that a body of type `text/*' is a sequence of lines,
+ each terminated by CRLF, that is, octets 13, 10.
+
+ In practice, HTML documents are frequently represented and
+ transmitted using an end of line convention that depends on the
+ conventions of the source of the document; frequently, that
+ representation consists of CR only, LF only, or a CR LF sequence.
+ Hence the decoding of the octets will often result in a text entity
+ with some missing record start and record end characters.
+
+ Since there is no ambiguity, HTML user agents are encouraged to infer
+ the missing record start and end characters.
+
+ An HTML user agent should treat end of line in any of its variations
+ as a word space in all contexts except preformatted text. Within
+ preformatted text, an HTML user agent should treat any of the three
+ common representations of end-of-line as starting a new line.
+
+<span class="h2"><a name="section-5">5</a>. Document Structure</span>
+
+ An HTML document is a tree of elements, including a head and body,
+ headings, paragraphs, lists, etc. Form elements are discussed in 8,
+ "Forms".
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 20]</span>
+<a name="page-21" id="page-21" href="#page-21" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h3"><a name="section-5.1">5.1</a>. Document Element: HTML</span>
+
+ The HTML document element consists of a head and a body, much like a
+ memo or a mail message. The head contains the title and optional
+ elements. The body is a text flow consisting of paragraphs, lists,
+ and other elements.
+
+<span class="h3"><a name="section-5.2">5.2</a>. Head: HEAD</span>
+
+ The head of an HTML document is an unordered collection of
+ information about the document. For example:
+
+ &lt;!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"&gt;
+ &lt;HEAD&gt;
+ &lt;TITLE&gt;Introduction to HTML&lt;/TITLE&gt;
+ &lt;/HEAD&gt;
+ ...
+
+<span class="h4"><a name="section-5.2.1">5.2.1</a>. Title: TITLE</span>
+
+ Every HTML document must contain a &lt;TITLE&gt; element.
+
+ The title should identify the contents of the document in a global
+ context. A short title, such as "Introduction" may be meaningless out
+ of context. A title such as "Introduction to HTML Elements" is more
+ appropriate.
+
+ NOTE - The length of a title is not limited; however, long titles
+ may be truncated in some applications. To minimize this
+ possibility, titles should be fewer than 64 characters.
+
+ A user agent may display the title of a document in a history list or
+ as a label for the window displaying the document. This differs from
+ headings (5.4, "Headings: H1 ... H6"), which are typically displayed
+ within the body text flow.
+
+<span class="h4"><a name="section-5.2.2">5.2.2</a>. Base Address: BASE</span>
+
+ The optional &lt;BASE&gt; element provides a base address for interpreting
+ relative URLs when the document is read out of context (see 7,
+ "Hyperlinks"). The value of the HREF attribute must be an absolute
+ URI.
+
+<span class="h4"><a name="section-5.2.3">5.2.3</a>. Keyword Index: ISINDEX</span>
+
+ The &lt;ISINDEX&gt; element indicates that the user agent should allow the
+ user to search an index by giving keywords. See 7.5, "Queries and
+ Indexes" for details.
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 21]</span>
+<a name="page-22" id="page-22" href="#page-22" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h4"><a name="section-5.2.4">5.2.4</a>. Link: LINK</span>
+
+ The &lt;LINK&gt; element represents a hyperlink (see 7, "Hyperlinks"). Any
+ number of LINK elements may occur in the &lt;HEAD&gt; element of an HTML
+ document. It has the same attributes as the &lt;A&gt; element (see 5.7.3,
+ "Anchor: A").
+
+ The &lt;LINK&gt; element is typically used to indicate authorship, related
+ indexes and glossaries, older or more recent versions, document
+ hierarchy, associated resources such as style sheets, etc.
+
+<span class="h4"><a name="section-5.2.5">5.2.5</a>. Associated Meta-information: META</span>
+
+ The &lt;META&gt; element is an extensible container for use in identifying
+ specialized document meta-information. Meta-information has two main
+ functions:
+
+ * to provide a means to discover that the data set exists
+ and how it might be obtained or accessed; and
+
+ * to document the content, quality, and features of a data
+ set, indicating its fitness for use.
+
+ Each &lt;META&gt; element specifies a name/value pair. If multiple META
+ elements are provided with the same name, their combined contents--
+ concatenated as a comma-separated list--is the value associated with
+ that name.
+
+ NOTE - The &lt;META&gt; element should not be used where a
+ specific element, such as &lt;TITLE&gt;, would be more
+ appropriate. Rather than a &lt;META&gt; element with a URI as
+ the value of the CONTENT attribute, use a &lt;LINK&gt;
+ element.
+
+ HTTP servers may read the content of the document &lt;HEAD&gt; to generate
+ header fields corresponding to any elements defining a value for the
+ attribute HTTP-EQUIV.
+
+ NOTE - The method by which the server extracts document
+ meta-information is unspecified and not mandatory. The
+ &lt;META&gt; element only provides an extensible mechanism for
+ identifying and embedding document meta-information --
+ how it may be used is up to the individual server
+ implementation and the HTML user agent.
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 22]</span>
+<a name="page-23" id="page-23" href="#page-23" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ Attributes of the META element:
+
+ HTTP-EQUIV
+ binds the element to an HTTP header field. An HTTP
+ server may use this information to process the document.
+ In particular, it may include a header field in the
+ responses to requests for this document: the header name
+ is taken from the HTTP-EQUIV attribute value, and the
+ header value is taken from the value of the CONTENT
+ attribute. HTTP header names are not case sensitive.
+
+ NAME
+ specifies the name of the name/value pair. If not
+ present, HTTP-EQUIV gives the name.
+
+ CONTENT
+ specifies the value of the name/value pair.
+
+ Examples
+
+ If the document contains:
+
+ &lt;META HTTP-EQUIV="Expires"
+ CONTENT="Tue, 04 Dec 1993 21:29:02 GMT"&gt;
+ &lt;meta http-equiv="Keywords" CONTENT="Fred"&gt;
+ &lt;META HTTP-EQUIV="Reply-to"
+ content="fielding@ics.uci.edu (Roy Fielding)"&gt;
+ &lt;Meta Http-equiv="Keywords" CONTENT="Barney"&gt;
+
+ then the server may include the following header fields:
+
+ Expires: Tue, 04 Dec 1993 21:29:02 GMT
+ Keywords: Fred, Barney
+ Reply-to: fielding@ics.uci.edu (Roy Fielding)
+
+ as part of the HTTP response to a `GET' or `HEAD' request for
+ that document.
+
+ An HTTP server must not use the &lt;META&gt; element to form an HTTP
+ response header unless the HTTP-EQUIV attribute is present.
+
+ An HTTP server may disregard any &lt;META&gt; elements that specify
+ information controlled by the HTTP server, for example `Server',
+
+ `Date', and `Last-modified'.
+
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 23]</span>
+<a name="page-24" id="page-24" href="#page-24" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h4"><a name="section-5.2.6">5.2.6</a>. Next Id: NEXTID</span>
+
+ The &lt;NEXTID&gt; element is included for historical reasons only. HTML
+ documents should not contain &lt;NEXTID&gt; elements.
+
+ The &lt;NEXTID&gt; element gives a hint for the name to use for a new &lt;A&gt;
+ element when editing an HTML document. It should be distinct from all
+ NAME attribute values on &lt;A&gt; elements. For example:
+
+ &lt;NEXTID N=Z27&gt;
+
+<span class="h3"><a name="section-5.3">5.3</a>. Body: BODY</span>
+
+ The &lt;BODY&gt; element contains the text flow of the document, including
+ headings, paragraphs, lists, etc.
+
+ For example:
+
+ &lt;BODY&gt;
+ &lt;h1&gt;Important Stuff&lt;/h1&gt;
+ &lt;p&gt;Explanation about important stuff...
+ &lt;/BODY&gt;
+
+<a href="#section-5.4">5.4</a>. Headings: H1 ... H6
+
+ The six heading elements, &lt;H1&gt; through &lt;H6&gt;, denote section headings.
+ Although the order and occurrence of headings is not constrained by
+ the HTML DTD, documents should not skip levels (for example, from H1
+ to H3), as converting such documents to other representations is
+ often problematic.
+
+ Example of use:
+
+ &lt;H1&gt;This is a heading&lt;/H1&gt;
+ Here is some text
+ &lt;H2&gt;Second level heading&lt;/H2&gt;
+ Here is some more text.
+
+ Typical renderings are:
+
+ H1
+ Bold, very-large font, centered. One or two blank lines
+ above and below.
+
+ H2
+ Bold, large font, flush-left. One or two blank lines
+ above and below.
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 24]</span>
+<a name="page-25" id="page-25" href="#page-25" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ H3
+ Italic, large font, slightly indented from the left
+ margin. One or two blank lines above and below.
+
+ H4
+ Bold, normal font, indented more than H3. One blank line
+ above and below.
+
+ H5
+ Italic, normal font, indented as H4. One blank line
+ above.
+
+ H6
+ Bold, indented same as normal text, more than H5. One
+ blank line above.
+
+<span class="h3"><a name="section-5.5">5.5</a>. Block Structuring Elements</span>
+
+ Block structuring elements include paragraphs, lists, and block
+ quotes. They must not contain heading elements, but they may contain
+ phrase markup, and in some cases, they may be nested.
+
+<span class="h4"><a name="section-5.5.1">5.5.1</a>. Paragraph: P</span>
+
+ The &lt;P&gt; element indicates a paragraph. The exact indentation, leading
+ space, etc. of a paragraph is not specified and may be a function of
+ other tags, style sheets, etc.
+
+ Typically, paragraphs are surrounded by a vertical space of one line
+ or half a line. The first line in a paragraph is indented in some
+ cases.
+
+ Example of use:
+
+ &lt;H1&gt;This Heading Precedes the Paragraph&lt;/H1&gt;
+ &lt;P&gt;This is the text of the first paragraph.
+ &lt;P&gt;This is the text of the second paragraph. Although you do not
+ need to start paragraphs on new lines, maintaining this
+ convention facilitates document maintenance.&lt;/P&gt;
+ &lt;P&gt;This is the text of a third paragraph.&lt;/P&gt;
+
+<span class="h4"><a name="section-5.5.2">5.5.2</a>. Preformatted Text: PRE</span>
+
+ The &lt;PRE&gt; element represents a character cell block of text and is
+ suitable for text that has been formatted for a monospaced font.
+
+ The &lt;PRE&gt; tag may be used with the optional WIDTH attribute. The
+ WIDTH attribute specifies the maximum number of characters for a line
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 25]</span>
+<a name="page-26" id="page-26" href="#page-26" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ and allows the HTML user agent to select a suitable font and
+ indentation.
+
+ Within preformatted text:
+
+ * Line breaks within the text are rendered as a move to the
+ beginning of the next line.
+
+ NOTE - References to the "beginning of a new line"
+ do not imply that the renderer is forbidden from
+ using a constant left indent for rendering
+ preformatted text. The left indent may be
+ constrained by the width required.
+
+ * Anchor elements and phrase markup may be used.
+
+ NOTE - Constraints on the processing of &lt;PRE&gt;
+ content may limit or prevent the ability of the HTML
+ user agent to faithfully render phrase markup.
+
+ * Elements that define paragraph formatting (headings,
+ address, etc.) must not be used.
+
+ NOTE - Some historical documents contain &lt;P&gt; tags in
+ &lt;PRE&gt; elements. User agents are encouraged to treat
+ this as a line break. A &lt;P&gt; tag followed by a
+ newline character should produce only one line
+ break, not a line break plus a blank line.
+
+ * The horizontal tab character (code position 9 in the HTML
+ document character set) must be interpreted as the smallest
+ positive nonzero number of spaces which will leave the
+ number of characters so far on the line as a multiple of 8.
+ Documents should not contain tab characters, as they are not
+ supported consistently.
+
+ Example of use:
+
+ &lt;PRE&gt;
+ Line 1.
+ Line 2 is to the right of line 1. &lt;a href="abc"&gt;abc&lt;/a&gt;
+ Line 3 aligns with line 2. &lt;a href="def"&gt;def&lt;/a&gt;
+ &lt;/PRE&gt;
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 26]</span>
+<a name="page-27" id="page-27" href="#page-27" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h5"><a name="section-5.5.2.1">5.5.2.1</a>. Example and Listing: XMP, LISTING</span>
+
+ The &lt;XMP&gt; and &lt;LISTING&gt; elements are similar to the &lt;PRE&gt; element,
+ but they have a different syntax. Their content is declared as CDATA,
+ which means that no markup except the end-tag open delimiter-in-
+ context is recognized (see 9.6 "Delimiter Recognition" of [<a href="#ref-SGML">SGML</a>]).
+
+ NOTE - In a previous draft of the HTML specification, the syntax
+ of &lt;XMP&gt; and &lt;LISTING&gt; elements allowed closing tags to be treated
+ as data characters, as long as the tag name was not &lt;XMP&gt; or
+ &lt;LISTING&gt;, respectively.
+
+ Since CDATA declared content has a number of unfortunate interactions
+ with processing techniques and tends to be used and implemented
+ inconsistently, HTML documents should not contain &lt;XMP&gt; nor &lt;LISTING&gt;
+ elements -- the &lt;PRE&gt; tag is more expressive and more consistently
+ supported.
+
+ The &lt;LISTING&gt; element should be rendered so that at least 132
+ characters fit on a line. The &lt;XMP&gt; element should be rendered so
+ that at least 80 characters fit on a line but is otherwise identical
+ to the &lt;LISTING&gt; element.
+
+ NOTE - In a previous draft, HTML included a &lt;PLAINTEXT&gt; element
+ that is similar to the &lt;LISTING&gt; element, except that there is no
+ closing tag: all characters after the &lt;PLAINTEXT&gt; start-tag are
+ data.
+
+<span class="h4"><a name="section-5.5.3">5.5.3</a>. Address: ADDRESS</span>
+
+ The &lt;ADDRESS&gt; element contains such information as address, signature
+ and authorship, often at the beginning or end of the body of a
+ document.
+
+ Typically, the &lt;ADDRESS&gt; element is rendered in an italic typeface
+ and may be indented.
+
+ Example of use:
+
+ &lt;ADDRESS&gt;
+ Newsletter editor&lt;BR&gt;
+ J.R. Brown&lt;BR&gt;
+ JimquickPost News, Jimquick, CT 01234&lt;BR&gt;
+ Tel (123) 456 7890
+ &lt;/ADDRESS&gt;
+
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 27]</span>
+<a name="page-28" id="page-28" href="#page-28" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h4"><a name="section-5.5.4">5.5.4</a>. Block Quote: BLOCKQUOTE</span>
+
+ The &lt;BLOCKQUOTE&gt; element contains text quoted from another source.
+
+ A typical rendering might be a slight extra left and right indent,
+ and/or italic font. The &lt;BLOCKQUOTE&gt; typically provides space above
+ and below the quote.
+
+ Single-font rendition may reflect the quotation style of Internet
+ mail by putting a vertical line of graphic characters, such as the
+ greater than symbol (&gt;), in the left margin.
+
+ Example of use:
+
+ I think the play ends
+ &lt;BLOCKQUOTE&gt;
+ &lt;P&gt;Soft you now, the fair Ophelia. Nymph, in thy orisons, be all
+ my sins remembered.
+ &lt;/BLOCKQUOTE&gt;
+ but I am not sure.
+
+<span class="h3"><a name="section-5.6">5.6</a>. List Elements</span>
+
+ HTML includes a number of list elements. They may be used in
+ combination; for example, a &lt;OL&gt; may be nested in an &lt;LI&gt; element of
+ a &lt;UL&gt;.
+
+ The COMPACT attribute suggests that a compact rendering be used.
+
+<span class="h4"><a name="section-5.6.1">5.6.1</a>. Unordered List: UL, LI</span>
+
+ The &lt;UL&gt; represents a list of items -- typically rendered as a
+ bulleted list.
+
+ The content of a &lt;UL&gt; element is a sequence of &lt;LI&gt; elements. For
+ example:
+
+ &lt;UL&gt;
+ &lt;LI&gt;First list item
+ &lt;LI&gt;Second list item
+ &lt;p&gt;second paragraph of second item
+ &lt;LI&gt;Third list item
+ &lt;/UL&gt;
+
+<span class="h4"><a name="section-5.6.2">5.6.2</a>. Ordered List: OL</span>
+
+ The &lt;OL&gt; element represents an ordered list of items, sorted by
+ sequence or order of importance. It is typically rendered as a
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 28]</span>
+<a name="page-29" id="page-29" href="#page-29" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ numbered list.
+
+ The content of a &lt;OL&gt; element is a sequence of &lt;LI&gt; elements. For
+ example:
+
+ &lt;OL&gt;
+ &lt;LI&gt;Click the Web button to open URI window.
+ &lt;LI&gt;Enter the URI number in the text field of the Open URI
+ window. The Web document you specified is displayed.
+ &lt;ol&gt;
+ &lt;li&gt;substep 1
+ &lt;li&gt;substep 2
+ &lt;/ol&gt;
+ &lt;LI&gt;Click highlighted text to move from one link to another.
+ &lt;/OL&gt;
+
+<span class="h4"><a name="section-5.6.3">5.6.3</a>. Directory List: DIR</span>
+
+ The &lt;DIR&gt; element is similar to the &lt;UL&gt; element. It represents a
+ list of short items, typically up to 20 characters each. Items in a
+ directory list may be arranged in columns, typically 24 characters
+ wide.
+
+ The content of a &lt;DIR&gt; element is a sequence of &lt;LI&gt; elements.
+ Nested block elements are not allowed in the content of &lt;DIR&gt;
+ elements. For example:
+
+ &lt;DIR&gt;
+ &lt;LI&gt;A-H&lt;LI&gt;I-M
+ &lt;LI&gt;M-R&lt;LI&gt;S-Z
+ &lt;/DIR&gt;
+
+<span class="h4"><a name="section-5.6.4">5.6.4</a>. Menu List: MENU</span>
+
+ The &lt;MENU&gt; element is a list of items with typically one line per
+ item. The menu list style is typically more compact than the style of
+ an unordered list.
+
+ The content of a &lt;MENU&gt; element is a sequence of &lt;LI&gt; elements.
+ Nested block elements are not allowed in the content of &lt;MENU&gt;
+ elements. For example:
+
+ &lt;MENU&gt;
+ &lt;LI&gt;First item in the list.
+ &lt;LI&gt;Second item in the list.
+ &lt;LI&gt;Third item in the list.
+ &lt;/MENU&gt;
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 29]</span>
+<a name="page-30" id="page-30" href="#page-30" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h4"><a name="section-5.6.5">5.6.5</a>. Definition List: DL, DT, DD</span>
+
+ A definition list is a list of terms and corresponding definitions.
+ Definition lists are typically formatted with the term flush-left and
+ the definition, formatted paragraph style, indented after the term.
+
+ The content of a &lt;DL&gt; element is a sequence of &lt;DT&gt; elements and/or
+ &lt;DD&gt; elements, usually in pairs. Multiple &lt;DT&gt; may be paired with a
+ single &lt;DD&gt; element. Documents should not contain multiple
+ consecutive &lt;DD&gt; elements.
+
+ Example of use:
+
+ &lt;DL&gt;
+ &lt;DT&gt;Term&lt;DD&gt;This is the definition of the first term.
+ &lt;DT&gt;Term&lt;DD&gt;This is the definition of the second term.
+ &lt;/DL&gt;
+
+ If the DT term does not fit in the DT column (typically one third of
+ the display area), it may be extended across the page with the DD
+ section moved to the next line, or it may be wrapped onto successive
+ lines of the left hand column.
+
+ The optional COMPACT attribute suggests that a compact rendering be
+ used, because the list items are small and/or the entire list is
+ large.
+
+ Unless the COMPACT attribute is present, an HTML user agent may leave
+ white space between successive DT, DD pairs. The COMPACT attribute
+ may also reduce the width of the left-hand (DT) column.
+
+ &lt;DL COMPACT&gt;
+ &lt;DT&gt;Term&lt;DD&gt;This is the first definition in compact format.
+ &lt;DT&gt;Term&lt;DD&gt;This is the second definition in compact format.
+ &lt;/DL&gt;
+
+<span class="h3"><a name="section-5.7">5.7</a>. Phrase Markup</span>
+
+ Phrases may be marked up according to idiomatic usage, typographic
+ appearance, or for use as hyperlink anchors.
+
+ User agents must render highlighted phrases distinctly from plain
+ text. Additionally, &lt;EM&gt; content must be rendered as distinct from
+ &lt;STRONG&gt; content, and &lt;B&gt; content must rendered as distinct from &lt;I&gt;
+ content.
+
+ Phrase elements may be nested within the content of other phrase
+ elements; however, HTML user agents may render nested phrase elements
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 30]</span>
+<a name="page-31" id="page-31" href="#page-31" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ indistinctly from non-nested elements:
+
+ plain &lt;B&gt;bold &lt;I&gt;italic&lt;/I&gt;&lt;/B&gt; may be rendered
+ the same as plain &lt;B&gt;bold &lt;/B&gt;&lt;I&gt;italic&lt;/I&gt;
+
+<span class="h4"><a name="section-5.7.1">5.7.1</a>. Idiomatic Elements</span>
+
+ Phrases may be marked up to indicate certain idioms.
+
+ NOTE - User agents may support the &lt;DFN&gt; element, not included in
+ this specification, as it has been deployed to some extent. It is
+ used to indicate the defining instance of a term, and it is
+ typically rendered in italic or bold italic.
+
+<span class="h5"><a name="section-5.7.1.1">5.7.1.1</a>. Citation: CITE</span>
+
+ The &lt;CITE&gt; element is used to indicate the title of a book or
+ other citation. It is typically rendered as italics. For example:
+
+ He just couldn't get enough of &lt;cite&gt;The Grapes of Wrath&lt;/cite&gt;.
+
+<span class="h5"><a name="section-5.7.1.2">5.7.1.2</a>. Code: CODE</span>
+
+ The &lt;CODE&gt; element indicates an example of code, typically
+ rendered in a mono-spaced font. The &lt;CODE&gt; element is intended for
+ short words or phrases of code; the &lt;PRE&gt; block structuring
+ element (5.5.2, "Preformatted Text: PRE") is more appropriate
+ for multiple-line listings. For example:
+
+ The expression &lt;code&gt;x += 1&lt;/code&gt;
+ is short for &lt;code&gt;x = x + 1&lt;/code&gt;.
+
+<span class="h5"><a name="section-5.7.1.3">5.7.1.3</a>. Emphasis: EM</span>
+
+ The &lt;EM&gt; element indicates an emphasized phrase, typically
+ rendered as italics. For example:
+
+ A singular subject &lt;em&gt;always&lt;/em&gt; takes a singular verb.
+
+<span class="h5"><a name="section-5.7.1.4">5.7.1.4</a>. Keyboard: KBD</span>
+
+ The &lt;KBD&gt; element indicates text typed by a user, typically
+ rendered in a mono-spaced font. This is commonly used in
+ instruction manuals. For example:
+
+ Enter &lt;kbd&gt;FIND IT&lt;/kbd&gt; to search the database.
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 31]</span>
+<a name="page-32" id="page-32" href="#page-32" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h5"><a name="section-5.7.1.5">5.7.1.5</a>. Sample: SAMP</span>
+
+ The &lt;SAMP&gt; element indicates a sequence of literal characters,
+ typically rendered in a mono-spaced font. For example:
+
+ The only word containing the letters &lt;samp&gt;mt&lt;/samp&gt; is dreamt.
+
+<span class="h5"><a name="section-5.7.1.6">5.7.1.6</a>. Strong Emphasis: STRONG</span>
+
+ The &lt;STRONG&gt; element indicates strong emphasis, typically rendered
+ in bold. For example:
+
+ &lt;strong&gt;STOP&lt;/strong&gt;, or I'll say "&lt;strong&gt;STOP&lt;/strong&gt;" again!
+
+<span class="h5"><a name="section-5.7.1.7">5.7.1.7</a>. Variable: VAR</span>
+
+ The &lt;VAR&gt; element indicates a placeholder variable, typically
+ rendered as italic. For example:
+
+ Type &lt;SAMP&gt;html-check &lt;VAR&gt;file&lt;/VAR&gt; | more&lt;/SAMP&gt;
+ to check &lt;VAR&gt;file&lt;/VAR&gt; for markup errors.
+
+<span class="h4"><a name="section-5.7.2">5.7.2</a>. Typographic Elements</span>
+
+ Typographic elements are used to specify the format of marked
+ text.
+
+ Typical renderings for idiomatic elements may vary between user
+ agents. If a specific rendering is necessary -- for example, when
+ referring to a specific text attribute as in "The italic parts are
+ mandatory" -- a typographic element can be used to ensure that the
+ intended typography is used where possible.
+
+ NOTE - User agents may support some typographic elements not
+ included in this specification, as they have been deployed to some
+ extent. The &lt;STRIKE&gt; element indicates horizontal line through the
+ characters, and the &lt;U&gt; element indicates an underline.
+
+<span class="h5"><a name="section-5.7.2.1">5.7.2.1</a>. Bold: B</span>
+
+ The &lt;B&gt; element indicates bold text. Where bold typography is
+ unavailable, an alternative representation may be used.
+
+<span class="h5"><a name="section-5.7.2.2">5.7.2.2</a>. Italic: I</span>
+
+ The &lt;I&gt; element indicates italic text. Where italic typography is
+ unavailable, an alternative representation may be used.
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 32]</span>
+<a name="page-33" id="page-33" href="#page-33" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h5"><a name="section-5.7.2.3">5.7.2.3</a>. Teletype: TT</span>
+
+ The &lt;TT&gt; element indicates teletype (monospaced )text. Where a
+ teletype font is unavailable, an alternative representation may be
+ used.
+
+<span class="h4"><a name="section-5.7.3">5.7.3</a>. Anchor: A</span>
+
+ The &lt;A&gt; element indicates a hyperlink anchor (see 7, "Hyperlinks").
+ At least one of the NAME and HREF attributes should be present.
+ Attributes of the &lt;A&gt; element:
+
+ HREF
+ gives the URI of the head anchor of a hyperlink.
+
+ NAME
+ gives the name of the anchor, and makes it available as
+ a head of a hyperlink.
+
+ TITLE
+ suggests a title for the destination resource --
+ advisory only. The TITLE attribute may be used:
+
+ * for display prior to accessing the destination
+ resource, for example, as a margin note or on a
+ small box while the mouse is over the anchor, or
+ while the document is being loaded;
+
+ * for resources that do not include a title, such as
+ graphics, plain text and Gopher menus, for use as a
+ window title.
+
+ REL
+ The REL attribute gives the relationship(s) described by
+ the hyperlink. The value is a whitespace separated list
+ of relationship names. The semantics of link
+ relationships are not specified in this document.
+
+ REV
+ same as the REL attribute, but the semantics of the
+ relationship are in the reverse direction. A link from A
+ to B with REL="X" expresses the same relationship as a
+ link from B to A with REV="X". An anchor may have both
+ REL and REV attributes.
+
+ URN
+ specifies a preferred, more persistent identifier for
+ the head anchor of the hyperlink. The syntax and
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 33]</span>
+<a name="page-34" id="page-34" href="#page-34" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ semantics of the URN attribute are not yet specified.
+
+ METHODS
+ specifies methods to be used in accessing the
+ destination, as a whitespace-separated list of names.
+ The set of applicable names is a function of the scheme
+ of the URI in the HREF attribute. For similar reasons as
+ for the TITLE attribute, it may be useful to include the
+ information in advance in the link. For example, the
+ HTML user agent may chose a different rendering as a
+ function of the methods allowed; for example, something
+ that is searchable may get a different icon.
+
+<span class="h3"><a name="section-5.8">5.8</a>. Line Break: BR</span>
+
+ The &lt;BR&gt; element specifies a line break between words (see 6,
+ "Characters, Words, and Paragraphs"). For example:
+
+ &lt;P&gt; Pease porridge hot&lt;BR&gt;
+ Pease porridge cold&lt;BR&gt;
+ Pease porridge in the pot&lt;BR&gt;
+ Nine days old.
+
+<span class="h3"><a name="section-5.9">5.9</a>. Horizontal Rule: HR</span>
+
+ The &lt;HR&gt; element is a divider between sections of text; typically a
+ full width horizontal rule or equivalent graphic. For example:
+
+ &lt;HR&gt;
+ &lt;ADDRESS&gt;February 8, 1995, CERN&lt;/ADDRESS&gt;
+ &lt;/BODY&gt;
+
+<span class="h3"><a name="section-5.10">5.10</a>. Image: IMG</span>
+
+ The &lt;IMG&gt; element refers to an image or icon via a hyperlink (see
+ 7.3, "Simultaneous Presentation of Image Resources").
+
+ HTML user agents may process the value of the ALT attribute as an
+ alternative to processing the image resource indicated by the SRC
+ attribute.
+
+ NOTE - Some HTML user agents can process graphics linked via
+ anchors, but not &lt;IMG&gt; graphics. If a graphic is essential, it
+ should be referenced from an &lt;A&gt; element rather than an &lt;IMG&gt;
+ element. If the graphic is not essential, then the &lt;IMG&gt; element
+ is appropriate.
+
+ Attributes of the &lt;IMG&gt; element:
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 34]</span>
+<a name="page-35" id="page-35" href="#page-35" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ ALIGN
+ alignment of the image with respect to the text
+ baseline.
+
+ * `TOP' specifies that the top of the image aligns
+ with the tallest item on the line containing the
+ image.
+
+ * `MIDDLE' specifies that the center of the image
+ aligns with the baseline of the line containing the
+ image.
+
+ * `BOTTOM' specifies that the bottom of the image
+ aligns with the baseline of the line containing the
+ image.
+
+ ALT
+ text to use in place of the referenced image resource,
+ for example due to processing constraints or user
+ preference.
+
+ ISMAP
+ indicates an image map (see 7.6, "Image Maps").
+
+ SRC
+ specifies the URI of the image resource.
+
+ NOTE - In practice, the media types of image
+ resources are limited to a few raster graphic
+ formats: typically `image/gif', `image/jpeg'. In
+ particular, `text/html' resources are not
+ intended to be used as image resources.
+
+ Examples of use:
+
+ &lt;IMG SRC="triangle.xbm" ALT="Warning:"&gt; Be sure
+ to read these instructions.
+
+ &lt;a href="http://machine/htbin/imagemap/sample"&gt;
+ &lt;IMG SRC="sample.xbm" ISMAP&gt;
+ &lt;/a&gt;
+
+<span class="h2"><a name="section-6">6</a>. Characters, Words, and Paragraphs</span>
+
+ An HTML user agent should present the body of an HTML document as a
+ collection of typeset paragraphs and preformatted text. Except for
+ preformatted elements (&lt;PRE&gt;, &lt;XMP&gt;, &lt;LISTING&gt;, &lt;TEXTAREA&gt;), each
+ block structuring element is regarded as a paragraph by taking the
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 35]</span>
+<a name="page-36" id="page-36" href="#page-36" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ data characters in its content and the content of its descendant
+ elements, concatenating them, and splitting the result into words,
+ separated by space, tab, or record end characters (and perhaps hyphen
+ characters). The sequence of words is typeset as a paragraph by
+ breaking it into lines.
+
+<span class="h3"><a name="section-6.1">6.1</a>. The HTML Document Character Set</span>
+
+ The document character set specified in 9.5, "SGML Declaration for
+ HTML" must be supported by HTML user agents. It includes the graphic
+ characters of Latin Alphabet No. 1, or simply Latin-1. Latin-1
+ comprises 191 graphic characters, including the alphabets of most
+ Western European languages.
+
+ NOTE - Use of the non-breaking space and soft hyphen indicator
+ characters is discouraged because support for them is not widely
+ deployed.
+
+ NOTE - To support non-western writing systems, a larger character
+ repertoire will be specified in a future version of HTML. The
+ document character set will be [<a href="#ref-ISO-10646">ISO-10646</a>], or some subset that
+ agrees with [<a href="#ref-ISO-10646">ISO-10646</a>]; in particular, all numeric character
+ references must use code positions assigned by [<a href="#ref-ISO-10646">ISO-10646</a>].
+
+ In SGML applications, the use of control characters is limited in
+ order to maximize the chance of successful interchange over
+ heterogeneous networks and operating systems. In the HTML document
+ character set only three control characters are allowed: Horizontal
+ Tab, Carriage Return, and Line Feed (code positions 9, 13, and 10).
+
+ The HTML DTD references the Added Latin 1 entity set, to allow
+ mnemonic representation of selected Latin 1 characters using only the
+ widely supported ASCII character repertoire. For example:
+
+ Kurt G&amp;ouml;del was a famous logician and mathematician.
+
+ See 9.7.2, "ISO Latin 1 Character Entity Set" for a table of the
+ "Added Latin 1" entities, and 13, "The HTML Coded Character Set" for
+ a table of the code positions of [ISO 8859-1] and the control
+ characters in the HTML document character set.
+
+<span class="h2"><a name="section-7">7</a>. Hyperlinks</span>
+
+ In addition to general purpose elements such as paragraphs and lists,
+ HTML documents can express hyperlinks. An HTML user agent allows the
+ user to navigate these hyperlinks.
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 36]</span>
+<a name="page-37" id="page-37" href="#page-37" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ A hyperlink is a relationship between two anchors, called the head
+ and the tail of the hyperlink[DEXTER]. Anchors are identified by an
+ anchor address: an absolute Uniform Resource Identifier (URI),
+ optionally followed by a '#' and a sequence of characters called a
+ fragment identifier. For example:
+
+ <a href="http://www.w3.org/hypertext/WWW/TheProject.html">http://www.w3.org/hypertext/WWW/TheProject.html</a>
+ <a href="http://www.w3.org/hypertext/WWW/TheProject.html#z31">http://www.w3.org/hypertext/WWW/TheProject.html#z31</a>
+
+ In an anchor address, the URI refers to a resource; it may be used in
+ a variety of information retrieval protocols to obtain an entity that
+ represents the resource, such as an HTML document. The fragment
+ identifier, if present, refers to some view on, or portion of the
+ resource.
+
+ Each of the following markup constructs indicates the tail anchor of
+ a hyperlink or set of hyperlinks:
+
+ * &lt;A&gt; elements with HREF present.
+
+ * &lt;LINK&gt; elements.
+
+ * &lt;IMG&gt; elements.
+
+ * &lt;INPUT&gt; elements with the SRC attribute present.
+
+ * &lt;ISINDEX&gt; elements.
+
+ * &lt;FORM&gt; elements with `METHOD=GET'.
+
+ These markup constructs refer to head anchors by a URI, either
+ absolute or relative, or a fragment identifier, or both.
+
+ In the case of a relative URI, the absolute URI in the address of the
+ head anchor is the result of combining the relative URI with a base
+ absolute URI as in [<a href="#ref-RELURL" title='"Relative Uniform Resource Locators"'>RELURL</a>]. The base document is taken from the
+ document's &lt;BASE&gt; element, if present; else, it is determined as in
+ [<a href="#ref-RELURL" title='"Relative Uniform Resource Locators"'>RELURL</a>].
+
+<span class="h3"><a name="section-7.1">7.1</a>. Accessing Resources</span>
+
+ Once the address of the head anchor is determined, the user agent may
+ obtain a representation of the resource.
+
+ For example, if the base URI is `http://host/x/y.html' and the
+ document contains:
+
+ &lt;img src="../icons/abc.gif"&gt;
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 37]</span>
+<a name="page-38" id="page-38" href="#page-38" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ then the user agent uses the URI `http://host/icons/abc.gif' to
+ access the resource, as in [<a href="#ref-URL" title='"Uniform Resource Locators (URL)"'>URL</a>]..
+
+<span class="h3"><a name="section-7.2">7.2</a>. Activation of Hyperlinks</span>
+
+ An HTML user agent allows the user to navigate the content of the
+ document and request activation of hyperlinks denoted by &lt;A&gt;
+ elements. HTML user agents should also allow activation of &lt;LINK&gt;
+ element hyperlinks.
+
+ To activate a link, the user agent obtains a representation of the
+ resource identified in the address of the head anchor. If the
+ representation is another HTML document, navigation may begin again
+ with this new document.
+
+<span class="h3"><a name="section-7.3">7.3</a>. Simultaneous Presentation of Image Resources</span>
+
+ An HTML user agent may activate hyperlinks indicated by &lt;IMG&gt; and
+ &lt;INPUT&gt; elements concurrently with processing the document; that is,
+ image hyperlinks may be processed without explicit request by the
+ user. Image resources should be embedded in the presentation at the
+ point of the tail anchor, that is the &lt;IMG&gt; or &lt;INPUT&gt; element.
+
+ &lt;LINK&gt; hyperlinks may also be processed without explicit user
+ request; for example, style sheet resources may be processed before
+ or during the processing of the document.
+
+<span class="h3"><a name="section-7.4">7.4</a>. Fragment Identifiers</span>
+
+ Any characters following a `#' character in a hypertext address
+ constitute a fragment identifier. In particular, an address of the
+ form `#fragment' refers to an anchor in the same document.
+
+ The meaning of fragment identifiers depends on the media type of the
+ representation of the anchor's resource. For `text/html'
+ representations, it refers to the &lt;A&gt; element with a NAME attribute
+ whose value is the same as the fragment identifier. The matching is
+ case sensitive. The document should have exactly one such element.
+ The user agent should indicate the anchor element, for example by
+ scrolling to and/or highlighting the phrase.
+
+ For example, if the base URI is `http://host/x/y.html' and the user
+ activated the link denoted by the following markup:
+
+ &lt;p&gt; See: &lt;a href="app1.html#bananas"&gt;appendix 1&lt;/a&gt;
+ for more detail on bananas.
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 38]</span>
+<a name="page-39" id="page-39" href="#page-39" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ Then the user agent accesses the resource identified by
+ `http://host/x/app1.html'. Assuming the resource is represented using
+ the `text/html' media type, the user agent must locate the &lt;A&gt;
+ element whose NAME attribute is `bananas' and begin navigation there.
+
+<span class="h3"><a name="section-7.5">7.5</a>. Queries and Indexes</span>
+
+ The &lt;ISINDEX&gt; element represents a set of hyperlinks. The user can
+ choose from the set by providing keywords to the user agent. The
+ user agent computes the head URI by appending `?' and the keywords to
+ the base URI. The keywords are escaped according to [<a href="#ref-URL" title='"Uniform Resource Locators (URL)"'>URL</a>] and joined
+ by `+'. For example, if a document contains:
+
+ &lt;BASE HREF="http://host/index"&gt;
+ &lt;ISINDEX&gt;
+
+ and the user provides the keywords `apple' and `berry', then the
+ user agent must access the resource
+ `http://host/index?apple+berry'.
+
+ &lt;FORM&gt; elements with `METHOD=GET' also represent sets of
+ hyperlinks. See 8.2.2, "Query Forms: METHOD=GET" for details.
+
+<span class="h3"><a name="section-7.6">7.6</a>. Image Maps</span>
+
+ If the ISMAP attribute is present on an &lt;IMG&gt; element, the &lt;IMG&gt;
+ element must be contained in an &lt;A&gt; element with an HREF present.
+ This construct represents a set of hyperlinks. The user can choose
+ from the set by choosing a pixel of the image. The user agent
+ computes the head URI by appending `?' and the x and y coordinates of
+ the pixel to the URI given in the &lt;A&gt; element. For example, if a
+ document contains:
+
+ &lt;!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"&gt;
+ &lt;head&gt;&lt;title&gt;ImageMap Example&lt;/title&gt;
+ &lt;BASE HREF="http://host/index"&gt;&lt;/head&gt;
+ &lt;body&gt;
+ &lt;p&gt; Choose any of these icons:&lt;br&gt;
+ &lt;a href="/cgi-bin/imagemap"&gt;&lt;img ismap src="icons.gif"&gt;&lt;/a&gt;
+
+ and the user chooses the upper-leftmost pixel, the chosen
+ hyperlink is the one with the URI
+ `http://host/cgi-bin/imagemap?0,0'.
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 39]</span>
+<a name="page-40" id="page-40" href="#page-40" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h2"><a name="section-8">8</a>. Forms</span>
+
+ A form is a template for a form data set and an associated
+ method and action URI. A form data set is a sequence of
+ name/value pair fields. The names are specified on the NAME
+ attributes of form input elements, and the values are given
+ initial values by various forms of markup and edited by the
+ user. The resulting form data set is used to access an
+ information service as a function of the action and method.
+
+ Forms elements can be mixed in with document structuring
+ elements. For example, a &lt;PRE&gt; element may contain a &lt;FORM&gt;
+ element, or a &lt;FORM&gt; element may contain lists which contain
+ &lt;INPUT&gt; elements. This gives considerable flexibility in
+ designing the layout of forms.
+
+ Form processing is a level 2 feature.
+
+<span class="h3"><a name="section-8.1">8.1</a>. Form Elements</span>
+
+<span class="h4"><a name="section-8.1.1">8.1.1</a>. Form: FORM</span>
+
+ The &lt;FORM&gt; element contains a sequence of input elements, along
+ with document structuring elements. The attributes are:
+
+ ACTION
+ specifies the action URI for the form. The action URI of
+ a form defaults to the base URI of the document (see 7,
+ "Hyperlinks").
+
+ METHOD
+ selects a method of accessing the action URI. The set of
+ applicable methods is a function of the scheme of the
+ action URI of the form. See 8.2.2, "Query Forms:
+ METHOD=GET" and 8.2.3, "Forms with Side-Effects:
+ METHOD=POST".
+
+ ENCTYPE
+ specifies the media type used to encode the name/value
+ pairs for transport, in case the protocol does not
+ itself impose a format. See 8.2.1, "The form-urlencoded
+ Media Type".
+
+<span class="h4"><a name="section-8.1.2">8.1.2</a>. Input Field: INPUT</span>
+
+ The &lt;INPUT&gt; element represents a field for user input. The TYPE
+ attribute discriminates between several variations of fields.
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 40]</span>
+<a name="page-41" id="page-41" href="#page-41" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ The &lt;INPUT&gt; element has a number of attributes. The set of applicable
+ attributes depends on the value of the TYPE attribute.
+
+<span class="h5"><a name="section-8.1.2.1">8.1.2.1</a>. Text Field: INPUT TYPE=TEXT</span>
+
+ The default value of the TYPE attribute is `TEXT', indicating a
+ single line text entry field. (Use the &lt;TEXTAREA&gt; element for multi-
+ line text fields.)
+
+ Required attributes are:
+
+ NAME
+ name for the form field corresponding to this element.
+
+ The optional attributes are:
+
+ MAXLENGTH
+ constrains the number of characters that can be entered
+ into a text input field. If the value of MAXLENGTH is
+ greater the the value of the SIZE attribute, the field
+ should scroll appropriately. The default number of
+ characters is unlimited.
+
+ SIZE
+ specifies the amount of display space allocated to this
+ input field according to its type. The default depends
+ on the user agent.
+
+ VALUE
+ The initial value of the field.
+
+ For example:
+
+&lt;p&gt;Street Address: &lt;input name=street&gt;&lt;br&gt;
+Postal City code: &lt;input name=city size=16 maxlength=16&gt;&lt;br&gt;
+Zip Code: &lt;input name=zip size=10 maxlength=10 value="99999-9999"&gt;&lt;br&gt;
+
+<span class="h5"><a name="section-8.1.2.2">8.1.2.2</a>. Password Field: INPUT TYPE=PASSWORD</span>
+
+ An &lt;INPUT&gt; element with `TYPE=PASSWORD' is a text field as above,
+ except that the value is obscured as it is entered. (see also: 10,
+ "Security Considerations").
+
+ For example:
+
+&lt;p&gt;Name: &lt;input name=login&gt; Password: &lt;input type=password name=passwd&gt;
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 41]</span>
+<a name="page-42" id="page-42" href="#page-42" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h5"><a name="section-8.1.2.3">8.1.2.3</a>. Check Box: INPUT TYPE=CHECKBOX</span>
+
+ An &lt;INPUT&gt; element with `TYPE=CHECKBOX' represents a boolean choice.
+ A set of such elements with the same name represents an n-of-many
+ choice field. Required attributes are:
+
+ NAME
+ symbolic name for the form field corresponding to this
+ element or group of elements.
+
+ VALUE
+ The portion of the value of the field contributed by
+ this element.
+
+ Optional attributes are:
+
+ CHECKED
+ indicates that the initial state is on.
+
+ For example:
+
+ &lt;p&gt;What flavors do you like?
+ &lt;input type=checkbox name=flavor value=vanilla&gt;Vanilla&lt;br&gt;
+ &lt;input type=checkbox name=flavor value=strawberry&gt;Strawberry&lt;br&gt;
+ &lt;input type=checkbox name=flavor value=chocolate checked&gt;Chocolate&lt;br&gt;
+
+<span class="h5"><a name="section-8.1.2.4">8.1.2.4</a>. Radio Button: INPUT TYPE=RADIO</span>
+
+ An &lt;INPUT&gt; element with `TYPE=RADIO' represents a boolean choice. A
+ set of such elements with the same name represents a 1-of-many choice
+ field. The NAME and VALUE attributes are required as for check boxes.
+ Optional attributes are:
+
+ CHECKED
+ indicates that the initial state is on.
+ At all times, exactly one of the radio buttons in a set is checked.
+ If none of the &lt;INPUT&gt; elements of a set of radio buttons specifies
+ `CHECKED', then the user agent must check the first radio button of
+ the set initially.
+
+ For example:
+
+ &lt;p&gt;Which is your favorite?
+ &lt;input type=radio name=flavor value=vanilla&gt;Vanilla&lt;br&gt;
+ &lt;input type=radio name=flavor value=strawberry&gt;Strawberry&lt;br&gt;
+ &lt;input type=radio name=flavor value=chocolate&gt;Chocolate&lt;br&gt;
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 42]</span>
+<a name="page-43" id="page-43" href="#page-43" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h5"><a name="section-8.1.2.5">8.1.2.5</a>. Image Pixel: INPUT TYPE=IMAGE</span>
+
+ An &lt;INPUT&gt; element with `TYPE=IMAGE' specifies an image resource to
+ display, and allows input of two form fields: the x and y coordinate
+ of a pixel chosen from the image. The names of the fields are the
+ name of the field with `.x' and `.y' appended. `TYPE=IMAGE' implies
+ `TYPE=SUBMIT' processing; that is, when a pixel is chosen, the form
+ as a whole is submitted.
+
+ The NAME attribute is required as for other input fields. The SRC
+ attribute is required and the ALIGN is optional as for the &lt;IMG&gt;
+ element (see 5.10, "Image: IMG").
+
+ For example:
+
+ &lt;p&gt;Choose a point on the map:
+ &lt;input type=image name=point src="map.gif"&gt;
+
+<span class="h5"><a name="section-8.1.2.6">8.1.2.6</a>. Hidden Field: INPUT TYPE=HIDDEN</span>
+
+ An &lt;INPUT&gt; element with `TYPE=HIDDEN' represents a hidden field.The
+ user does not interact with this field; instead, the VALUE attribute
+ specifies the value of the field. The NAME and VALUE attributes are
+ required.
+
+ For example:
+
+ &lt;input type=hidden name=context value="l2k3j4l2k3j4l2k3j4lk23"&gt;
+
+<span class="h5"><a name="section-8.1.2.7">8.1.2.7</a>. Submit Button: INPUT TYPE=SUBMIT</span>
+
+ An &lt;INPUT&gt; element with `TYPE=SUBMIT' represents an input option,
+ typically a button, that instructs the user agent to submit the form.
+ Optional attributes are:
+
+ NAME
+ indicates that this element contributes a form field
+ whose value is given by the VALUE attribute. If the NAME
+ attribute is not present, this element does not
+ contribute a form field.
+
+ VALUE
+ indicates a label for the input (button).
+
+ You may submit this request internally:
+ &lt;input type=submit name=recipient value=internal&gt;&lt;br&gt;
+ or to the external world:
+ &lt;input type=submit name=recipient value=world&gt;
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 43]</span>
+<a name="page-44" id="page-44" href="#page-44" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h5"><a name="section-8.1.2.8">8.1.2.8</a>. Reset Button: INPUT TYPE=RESET</span>
+
+ An &lt;INPUT&gt; element with `TYPE=RESET' represents an input option,
+ typically a button, that instructs the user agent to reset the form's
+ fields to their initial states. The VALUE attribute, if present,
+ indicates a label for the input (button).
+
+ When you are finished, you may submit this request:
+ &lt;input type=submit&gt;&lt;br&gt;
+ You may clear the form and start over at any time: &lt;input type=reset&gt;
+
+<span class="h4"><a name="section-8.1.3">8.1.3</a>. Selection: SELECT</span>
+
+ The &lt;SELECT&gt; element constrains the form field to an enumerated list
+ of values. The values are given in &lt;OPTION&gt; elements. Attributes
+ are:
+
+ MULTIPLE
+ indicates that more than one option may be included in
+ the value.
+
+ NAME
+ specifies the name of the form field.
+
+ SIZE
+ specifies the number of visible items. Select fields of
+ size one are typically pop-down menus, whereas select
+ fields with size greater than one are typically lists.
+
+ For example:
+
+ &lt;SELECT NAME="flavor"&gt;
+ &lt;OPTION&gt;Vanilla
+ &lt;OPTION&gt;Strawberry
+ &lt;OPTION value="RumRasin"&gt;Rum and Raisin
+ &lt;OPTION selected&gt;Peach and Orange
+ &lt;/SELECT&gt;
+
+ The initial state has the first option selected, unless a SELECTED
+ attribute is present on any of the &lt;OPTION&gt; elements.
+
+<span class="h5"><a name="section-8.1.3.1">8.1.3.1</a>. Option: OPTION</span>
+
+ The Option element can only occur within a Select element. It
+ represents one choice, and has the following attributes:
+
+ SELECTED
+ Indicates that this option is initially selected.
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 44]</span>
+<a name="page-45" id="page-45" href="#page-45" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ VALUE
+ indicates the value to be returned if this option is
+ chosen. The field value defaults to the content of the
+ &lt;OPTION&gt; element.
+
+ The content of the &lt;OPTION&gt; element is presented to the user to
+ represent the option. It is used as a returned value if the VALUE
+ attribute is not present.
+
+<span class="h4"><a name="section-8.1.4">8.1.4</a>. Text Area: TEXTAREA</span>
+
+ The &lt;TEXTAREA&gt; element represents a multi-line text field.
+ Attributes are:
+
+ COLS
+ the number of visible columns to display for the text
+ area, in characters.
+
+ NAME
+ Specifies the name of the form field.
+
+ ROWS
+ The number of visible rows to display for the text area,
+ in characters.
+
+ For example:
+
+ &lt;TEXTAREA NAME="address" ROWS=6 COLS=64&gt;
+ HaL Computer Systems
+ 1315 Dell Avenue
+ Campbell, California 95008
+ &lt;/TEXTAREA&gt;
+
+ The content of the &lt;TEXTAREA&gt; element is the field's initial value.
+
+ Typically, the ROWS and COLS attributes determine the visible
+ dimension of the field in characters. The field is typically rendered
+ in a fixed-width font. HTML user agents should allow text to extend
+ beyond these limits by scrolling as needed.
+
+<span class="h3"><a name="section-8.2">8.2</a>. Form Submission</span>
+
+ An HTML user agent begins processing a form by presenting the
+ document with the fields in their initial state. The user is allowed
+ to modify the fields, constrained by the field type etc. When the
+ user indicates that the form should be submitted (using a submit
+ button or image input), the form data set is processed according to
+ its method, action URI and enctype.
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 45]</span>
+<a name="page-46" id="page-46" href="#page-46" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ When there is only one single-line text input field in a form, the
+ user agent should accept Enter in that field as a request to submit
+ the form.
+
+<span class="h4"><a name="section-8.2.1">8.2.1</a>. The form-urlencoded Media Type</span>
+
+ The default encoding for all forms is `application/x-www-form-
+ urlencoded'. A form data set is represented in this media type as
+ follows:
+
+ 1. The form field names and values are escaped: space
+ characters are replaced by `+', and then reserved characters
+ are escaped as per [<a href="#ref-URL" title='"Uniform Resource Locators (URL)"'>URL</a>]; that is, non-alphanumeric
+ characters are replaced by `%HH', a percent sign and two
+ hexadecimal digits representing the ASCII code of the
+ character. Line breaks, as in multi-line text field values,
+ are represented as CR LF pairs, i.e. `%0D%0A'.
+
+ 2. The fields are listed in the order they appear in the
+ document with the name separated from the value by `=' and
+ the pairs separated from each other by `&amp;'. Fields with null
+ values may be omitted. In particular, unselected radio
+ buttons and checkboxes should not appear in the encoded
+ data, but hidden fields with VALUE attributes present
+ should.
+
+ NOTE - The URI from a query form submission can be
+ used in a normal anchor style hyperlink.
+ Unfortunately, the use of the `&amp;' character to
+ separate form fields interacts with its use in SGML
+ attribute values as an entity reference delimiter.
+ For example, the URI `http://host/?x=1&amp;y=2' must be
+ written `&lt;a href="http://host/?x=1&amp;#38;y=2"' or `&lt;a
+ href="http://host/?x=1&amp;amp;y=2"&gt;'.
+
+ HTTP server implementors, and in particular, CGI
+ implementors are encouraged to support the use of
+ `;' in place of `&amp;' to save users the trouble of
+ escaping `&amp;' characters this way.
+
+<span class="h4"><a name="section-8.2.2">8.2.2</a>. Query Forms: METHOD=GET</span>
+
+ If the processing of a form is idempotent (i.e. it has no lasting
+ observable effect on the state of the world), then the form method
+ should be `GET'. Many database searches have no visible side-effects
+ and make ideal applications of query forms.
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 46]</span>
+<a name="page-47" id="page-47" href="#page-47" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ To process a form whose action URL is an HTTP URL and whose method is
+ `GET', the user agent starts with the action URI and appends a `?'
+ and the form data set, in `application/x-www-form-urlencoded' format
+ as above. The user agent then traverses the link to this URI just as
+ if it were an anchor (see 7.2, "Activation of Hyperlinks").
+
+ NOTE - The URL encoding may result in very long URIs, which cause
+ some historical HTTP server implementations to exhibit defective
+ behavior. As a result, some HTML forms are written using
+ `METHOD=POST' even though the form submission has no side-effects.
+
+<span class="h4"><a name="section-8.2.3">8.2.3</a>. Forms with Side-Effects: METHOD=POST</span>
+
+ If the service associated with the processing of a form has side
+ effects (for example, modification of a database or subscription to a
+ service), the method should be `POST'.
+
+ To process a form whose action URL is an HTTP URL and whose method is
+ `POST', the user agent conducts an HTTP POST transaction using the
+ action URI, and a message body of type `application/x-www-form-
+ urlencoded' format as above. The user agent should display the
+ response from the HTTP POST interaction just as it would display the
+ response from an HTTP GET above.
+
+<span class="h4"><a name="section-8.2.4">8.2.4</a>. Example Form Submission: Questionnaire Form</span>
+
+ Consider the following document:
+
+ &lt;!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"&gt;
+ &lt;title&gt;Sample of HTML Form Submission&lt;/title&gt;
+ &lt;H1&gt;Sample Questionnaire&lt;/H1&gt;
+ &lt;P&gt;Please fill out this questionnaire:
+ &lt;FORM METHOD="POST" ACTION="http://www.w3.org/sample"&gt;
+ &lt;P&gt;Your name: &lt;INPUT NAME="name" size="48"&gt;
+ &lt;P&gt;Male &lt;INPUT NAME="gender" TYPE=RADIO VALUE="male"&gt;
+ &lt;P&gt;Female &lt;INPUT NAME="gender" TYPE=RADIO VALUE="female"&gt;
+ &lt;P&gt;Number in family: &lt;INPUT NAME="family" TYPE=text&gt;
+ &lt;P&gt;Cities in which you maintain a residence:
+ &lt;UL&gt;
+ &lt;LI&gt;Kent &lt;INPUT NAME="city" TYPE=checkbox VALUE="kent"&gt;
+ &lt;LI&gt;Miami &lt;INPUT NAME="city" TYPE=checkbox VALUE="miami"&gt;
+ &lt;LI&gt;Other &lt;TEXTAREA NAME="other" cols=48 rows=4&gt;&lt;/textarea&gt;
+ &lt;/UL&gt;
+ Nickname: &lt;INPUT NAME="nickname" SIZE="42"&gt;
+ &lt;P&gt;Thank you for responding to this questionnaire.
+ &lt;P&gt;&lt;INPUT TYPE=SUBMIT&gt; &lt;INPUT TYPE=RESET&gt;
+ &lt;/FORM&gt;
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 47]</span>
+<a name="page-48" id="page-48" href="#page-48" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ The initial state of the form data set is:
+
+ name
+ ""
+
+ gender
+ "male"
+
+ family
+ ""
+
+ other
+ ""
+
+ nickname
+ ""
+
+ Note that the radio input has an initial value, while the
+ checkbox has none.
+
+ The user might edit the fields and request that the form be
+ submitted. At that point, suppose the values are:
+
+ name
+ "John Doe"
+
+ gender
+ "male"
+
+ family
+ "5"
+
+ city
+ "kent"
+
+ city
+ "miami"
+
+ other
+ "abc\ndefk"
+
+ nickname
+ "J&amp;D"
+
+ The user agent then conducts an HTTP POST transaction using the URI
+ `http://www.w3.org/sample'. The message body would be (ignore the
+ line break):
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 48]</span>
+<a name="page-49" id="page-49" href="#page-49" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ name=John+Doe&amp;gender=male&amp;family=5&amp;city=kent&amp;city=miami&amp;
+ other=abc%0D%0Adef&amp;nickname=J%26D
+
+<span class="h2"><a name="section-9">9</a>. HTML Public Text</span>
+
+<span class="h3"><a name="section-9.1">9.1</a>. HTML DTD</span>
+
+ This is the Document Type Definition for the HyperText Markup
+ Language, level 2.
+
+&lt;!-- html.dtd
+
+ Document Type Definition for the HyperText Markup Language
+ (HTML DTD)
+
+ $Id: html.dtd,v 1.30 1995/09/21 23:30:19 connolly Exp $
+
+ Author: Daniel W. Connolly &lt;connolly@w3.org&gt;
+ See Also: html.decl, html-1.dtd
+ <a href="http://www.w3.org/hypertext/WWW/MarkUp/MarkUp.html">http://www.w3.org/hypertext/WWW/MarkUp/MarkUp.html</a>
+--&gt;
+
+&lt;!ENTITY % HTML.Version
+ "-//IETF//DTD HTML 2.0//EN"
+
+ -- Typical usage:
+
+ &lt;!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN"&gt;
+ &lt;html&gt;
+ ...
+ &lt;/html&gt;
+ --
+ &gt;
+
+
+&lt;!--============ Feature Test Entities ========================--&gt;
+
+&lt;!ENTITY % HTML.Recommended "IGNORE"
+ -- Certain features of the language are necessary for
+ compatibility with widespread usage, but they may
+ compromise the structural integrity of a document.
+ This feature test entity enables a more prescriptive
+ document type definition that eliminates
+ those features.
+ --&gt;
+
+&lt;![ %HTML.Recommended [
+ &lt;!ENTITY % HTML.Deprecated "IGNORE"&gt;
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 49]</span>
+<a name="page-50" id="page-50" href="#page-50" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+]]&gt;
+
+&lt;!ENTITY % HTML.Deprecated "INCLUDE"
+ -- Certain features of the language are necessary for
+ compatibility with earlier versions of the specification,
+ but they tend to be used and implemented inconsistently,
+ and their use is deprecated. This feature test entity
+ enables a document type definition that eliminates
+ these features.
+ --&gt;
+
+&lt;!ENTITY % HTML.Highlighting "INCLUDE"
+ -- Use this feature test entity to validate that a
+ document uses no highlighting tags, which may be
+ ignored on minimal implementations.
+ --&gt;
+
+&lt;!ENTITY % HTML.Forms "INCLUDE"
+ -- Use this feature test entity to validate that a document
+ contains no forms, which may not be supported in minimal
+ implementations
+ --&gt;
+
+&lt;!--============== Imported Names ==============================--&gt;
+
+&lt;!ENTITY % Content-Type "CDATA"
+ -- meaning an internet media type
+ (aka MIME content type, as per <a href="./rfc1521">RFC1521</a>)
+ --&gt;
+
+&lt;!ENTITY % HTTP-Method "GET | POST"
+ -- as per HTTP specification, in progress
+ --&gt;
+
+&lt;!--========= DTD "Macros" =====================--&gt;
+
+&lt;!ENTITY % heading "H1|H2|H3|H4|H5|H6"&gt;
+
+&lt;!ENTITY % list " UL | OL | DIR | MENU " &gt;
+
+
+&lt;!--======= Character mnemonic entities =================--&gt;
+
+&lt;!ENTITY % ISOlat1 PUBLIC
+ "ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML"&gt;
+%ISOlat1;
+
+&lt;!ENTITY amp CDATA "&amp;#38;" -- ampersand --&gt;
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 50]</span>
+<a name="page-51" id="page-51" href="#page-51" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+&lt;!ENTITY gt CDATA "&amp;#62;" -- greater than --&gt;
+&lt;!ENTITY lt CDATA "&amp;#60;" -- less than --&gt;
+&lt;!ENTITY quot CDATA "&amp;#34;" -- double quote --&gt;
+
+
+&lt;!--========= SGML Document Access (SDA) Parameter Entities =====--&gt;
+
+&lt;!-- HTML 2.0 contains SGML Document Access (SDA) fixed attributes
+in support of easy transformation to the International Committee
+for Accessible Document Design (ICADD) DTD
+ "-//EC-USA-CDA/ICADD//DTD ICADD22//EN".
+<span class="h1"><a name="appendix-ICADD">ICADD</a> applications are designed to support usable access to</span>
+structured information by print-impaired individuals through
+Braille, large print and voice synthesis. For more information on
+<span class="h1"><a name="appendix-SDA">SDA</a> &amp; ICADD:</span>
+ - ISO 12083:1993, Annex A.8, Facilities for Braille,
+ large print and computer voice
+ - ICADD ListServ
+ &lt;ICADD%ASUACAD.BITNET@ARIZVM1.ccit.arizona.edu&gt;
+ - Usenet news group bit.listserv.easi
+ - Recording for the Blind, +1 800 221 4792
+--&gt;
+
+&lt;!ENTITY % SDAFORM "SDAFORM CDATA #FIXED"
+ -- one to one mapping --&gt;
+&lt;!ENTITY % SDARULE "SDARULE CDATA #FIXED"
+ -- context-sensitive mapping --&gt;
+&lt;!ENTITY % SDAPREF "SDAPREF CDATA #FIXED"
+ -- generated text prefix --&gt;
+&lt;!ENTITY % SDASUFF "SDASUFF CDATA #FIXED"
+ -- generated text suffix --&gt;
+&lt;!ENTITY % SDASUSP "SDASUSP NAME #FIXED"
+ -- suspend transform process --&gt;
+
+
+&lt;!--========== Text Markup =====================--&gt;
+
+&lt;![ %HTML.Highlighting [
+
+&lt;!ENTITY % font " TT | B | I "&gt;
+
+&lt;!ENTITY % phrase "EM | STRONG | CODE | SAMP | KBD | VAR | CITE "&gt;
+
+&lt;!ENTITY % text "#PCDATA | A | IMG | BR | %phrase | %font"&gt;
+
+&lt;!ELEMENT (%font;|%phrase) - - (%text)*&gt;
+&lt;!ATTLIST ( TT | CODE | SAMP | KBD | VAR )
+ %SDAFORM; "Lit"
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 51]</span>
+<a name="page-52" id="page-52" href="#page-52" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ &gt;
+&lt;!ATTLIST ( B | STRONG )
+ %SDAFORM; "B"
+ &gt;
+&lt;!ATTLIST ( I | EM | CITE )
+ %SDAFORM; "It"
+ &gt;
+
+&lt;!-- &lt;TT&gt; Typewriter text --&gt;
+&lt;!-- &lt;B&gt; Bold text --&gt;
+&lt;!-- &lt;I&gt; Italic text --&gt;
+
+&lt;!-- &lt;EM&gt; Emphasized phrase --&gt;
+&lt;!-- &lt;STRONG&gt; Strong emphasis --&gt;
+&lt;!-- &lt;CODE&gt; Source code phrase --&gt;
+&lt;!-- &lt;SAMP&gt; Sample text or characters --&gt;
+&lt;!-- &lt;KBD&gt; Keyboard phrase, e.g. user input --&gt;
+&lt;!-- &lt;VAR&gt; Variable phrase or substitutable --&gt;
+&lt;!-- &lt;CITE&gt; Name or title of cited work --&gt;
+
+&lt;!ENTITY % pre.content "#PCDATA | A | HR | BR | %font | %phrase"&gt;
+
+]]&gt;
+
+&lt;!ENTITY % text "#PCDATA | A | IMG | BR"&gt;
+
+&lt;!ELEMENT BR - O EMPTY&gt;
+&lt;!ATTLIST BR
+ %SDAPREF; "&amp;#RE;"
+ &gt;
+
+&lt;!-- &lt;BR&gt; Line break --&gt;
+
+
+&lt;!--========= Link Markup ======================--&gt;
+
+&lt;!ENTITY % linkType "NAMES"&gt;
+
+&lt;!ENTITY % linkExtraAttributes
+ "REL %linkType #IMPLIED
+ REV %linkType #IMPLIED
+ URN CDATA #IMPLIED
+ TITLE CDATA #IMPLIED
+ METHODS NAMES #IMPLIED
+ "&gt;
+
+&lt;![ %HTML.Recommended [
+ &lt;!ENTITY % A.content "(%text)*"
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 52]</span>
+<a name="page-53" id="page-53" href="#page-53" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ -- &lt;H1&gt;&lt;a name="xxx"&gt;Heading&lt;/a&gt;&lt;/H1&gt;
+ is preferred to
+ &lt;a name="xxx"&gt;&lt;H1&gt;Heading&lt;/H1&gt;&lt;/a&gt;
+ --&gt;
+]]&gt;
+
+&lt;!ENTITY % A.content "(%heading|%text)*"&gt;
+
+&lt;!ELEMENT A - - %A.content -(A)&gt;
+&lt;!ATTLIST A
+ HREF CDATA #IMPLIED
+ NAME CDATA #IMPLIED
+ %linkExtraAttributes;
+ %SDAPREF; "&lt;Anchor: #AttList&gt;"
+ &gt;
+&lt;!-- &lt;A&gt; Anchor; source/destination of link --&gt;
+&lt;!-- &lt;A NAME="..."&gt; Name of this anchor --&gt;
+&lt;!-- &lt;A HREF="..."&gt; Address of link destination --&gt;
+&lt;!-- &lt;A URN="..."&gt; Permanent address of destination --&gt;
+&lt;!-- &lt;A REL=...&gt; Relationship to destination --&gt;
+&lt;!-- &lt;A REV=...&gt; Relationship of destination to this --&gt;
+&lt;!-- &lt;A TITLE="..."&gt; Title of destination (advisory) --&gt;
+&lt;!-- &lt;A METHODS="..."&gt; Operations on destination (advisory) --&gt;
+
+
+&lt;!--========== Images ==========================--&gt;
+
+&lt;!ELEMENT IMG - O EMPTY&gt;
+&lt;!ATTLIST IMG
+ SRC CDATA #REQUIRED
+ ALT CDATA #IMPLIED
+ ALIGN (top|middle|bottom) #IMPLIED
+ ISMAP (ISMAP) #IMPLIED
+ %SDAPREF; "&lt;Fig&gt;&lt;?SDATrans Img: #AttList&gt;#AttVal(Alt)&lt;/Fig&gt;"
+ &gt;
+
+&lt;!-- &lt;IMG&gt; Image; icon, glyph or illustration --&gt;
+&lt;!-- &lt;IMG SRC="..."&gt; Address of image object --&gt;
+&lt;!-- &lt;IMG ALT="..."&gt; Textual alternative --&gt;
+&lt;!-- &lt;IMG ALIGN=...&gt; Position relative to text --&gt;
+&lt;!-- &lt;IMG ISMAP&gt; Each pixel can be a link --&gt;
+
+&lt;!--========== Paragraphs=======================--&gt;
+
+&lt;!ELEMENT P - O (%text)*&gt;
+&lt;!ATTLIST P
+ %SDAFORM; "Para"
+ &gt;
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 53]</span>
+<a name="page-54" id="page-54" href="#page-54" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+&lt;!-- &lt;P&gt; Paragraph --&gt;
+
+
+&lt;!--========== Headings, Titles, Sections ===============--&gt;
+
+&lt;!ELEMENT HR - O EMPTY&gt;
+&lt;!ATTLIST HR
+ %SDAPREF; "&amp;#RE;&amp;#RE;"
+ &gt;
+
+&lt;!-- &lt;HR&gt; Horizontal rule --&gt;
+
+&lt;!ELEMENT ( %heading ) - - (%text;)*&gt;
+&lt;!ATTLIST H1
+ %SDAFORM; "H1"
+ &gt;
+&lt;!ATTLIST H2
+ %SDAFORM; "H2"
+ &gt;
+&lt;!ATTLIST H3
+ %SDAFORM; "H3"
+ &gt;
+&lt;!ATTLIST H4
+ %SDAFORM; "H4"
+ &gt;
+&lt;!ATTLIST H5
+ %SDAFORM; "H5"
+ &gt;
+&lt;!ATTLIST H6
+ %SDAFORM; "H6"
+ &gt;
+
+&lt;!-- &lt;H1&gt; Heading, level 1 --&gt;
+&lt;!-- &lt;H2&gt; Heading, level 2 --&gt;
+&lt;!-- &lt;H3&gt; Heading, level 3 --&gt;
+&lt;!-- &lt;H4&gt; Heading, level 4 --&gt;
+&lt;!-- &lt;H5&gt; Heading, level 5 --&gt;
+&lt;!-- &lt;H6&gt; Heading, level 6 --&gt;
+
+
+&lt;!--========== Text Flows ======================--&gt;
+
+&lt;![ %HTML.Forms [
+ &lt;!ENTITY % block.forms "BLOCKQUOTE | FORM | ISINDEX"&gt;
+]]&gt;
+
+&lt;!ENTITY % block.forms "BLOCKQUOTE"&gt;
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 54]</span>
+<a name="page-55" id="page-55" href="#page-55" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+&lt;![ %HTML.Deprecated [
+ &lt;!ENTITY % preformatted "PRE | XMP | LISTING"&gt;
+]]&gt;
+
+&lt;!ENTITY % preformatted "PRE"&gt;
+
+&lt;!ENTITY % block "P | %list | DL
+ | %preformatted
+ | %block.forms"&gt;
+
+&lt;!ENTITY % flow "(%text|%block)*"&gt;
+
+&lt;!ENTITY % pre.content "#PCDATA | A | HR | BR"&gt;
+&lt;!ELEMENT PRE - - (%pre.content)*&gt;
+&lt;!ATTLIST PRE
+ WIDTH NUMBER #implied
+ %SDAFORM; "Lit"
+ &gt;
+
+&lt;!-- &lt;PRE&gt; Preformatted text --&gt;
+&lt;!-- &lt;PRE WIDTH=...&gt; Maximum characters per line --&gt;
+
+&lt;![ %HTML.Deprecated [
+
+&lt;!ENTITY % literal "CDATA"
+ -- historical, non-conforming parsing mode where
+ the only markup signal is the end tag
+ in full
+ --&gt;
+
+&lt;!ELEMENT (XMP|LISTING) - - %literal&gt;
+&lt;!ATTLIST XMP
+ %SDAFORM; "Lit"
+ %SDAPREF; "Example:&amp;#RE;"
+ &gt;
+&lt;!ATTLIST LISTING
+ %SDAFORM; "Lit"
+ %SDAPREF; "Listing:&amp;#RE;"
+ &gt;
+
+&lt;!-- &lt;XMP&gt; Example section --&gt;
+&lt;!-- &lt;LISTING&gt; Computer listing --&gt;
+
+&lt;!ELEMENT PLAINTEXT - O %literal&gt;
+&lt;!-- &lt;PLAINTEXT&gt; Plain text passage --&gt;
+
+&lt;!ATTLIST PLAINTEXT
+ %SDAFORM; "Lit"
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 55]</span>
+<a name="page-56" id="page-56" href="#page-56" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ &gt;
+]]&gt;
+
+&lt;!--========== Lists ==================--&gt;
+
+&lt;!ELEMENT DL - - (DT | DD)+&gt;
+&lt;!ATTLIST DL
+ COMPACT (COMPACT) #IMPLIED
+ %SDAFORM; "List"
+ %SDAPREF; "Definition List:"
+ &gt;
+
+&lt;!ELEMENT DT - O (%text)*&gt;
+&lt;!ATTLIST DT
+ %SDAFORM; "Term"
+ &gt;
+
+&lt;!ELEMENT DD - O %flow&gt;
+&lt;!ATTLIST DD
+ %SDAFORM; "LItem"
+ &gt;
+
+&lt;!-- &lt;DL&gt; Definition list, or glossary --&gt;
+&lt;!-- &lt;DL COMPACT&gt; Compact style list --&gt;
+&lt;!-- &lt;DT&gt; Term in definition list --&gt;
+&lt;!-- &lt;DD&gt; Definition of term --&gt;
+
+&lt;!ELEMENT (OL|UL) - - (LI)+&gt;
+&lt;!ATTLIST OL
+ COMPACT (COMPACT) #IMPLIED
+ %SDAFORM; "List"
+ &gt;
+&lt;!ATTLIST UL
+ COMPACT (COMPACT) #IMPLIED
+ %SDAFORM; "List"
+ &gt;
+&lt;!-- &lt;UL&gt; Unordered list --&gt;
+&lt;!-- &lt;UL COMPACT&gt; Compact list style --&gt;
+&lt;!-- &lt;OL&gt; Ordered, or numbered list --&gt;
+&lt;!-- &lt;OL COMPACT&gt; Compact list style --&gt;
+
+
+&lt;!ELEMENT (DIR|MENU) - - (LI)+ -(%block)&gt;
+&lt;!ATTLIST DIR
+ COMPACT (COMPACT) #IMPLIED
+ %SDAFORM; "List"
+ %SDAPREF; "&lt;LHead&gt;Directory&lt;/LHead&gt;"
+ &gt;
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 56]</span>
+<a name="page-57" id="page-57" href="#page-57" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+&lt;!ATTLIST MENU
+ COMPACT (COMPACT) #IMPLIED
+ %SDAFORM; "List"
+ %SDAPREF; "&lt;LHead&gt;Menu&lt;/LHead&gt;"
+ &gt;
+
+&lt;!-- &lt;DIR&gt; Directory list --&gt;
+&lt;!-- &lt;DIR COMPACT&gt; Compact list style --&gt;
+&lt;!-- &lt;MENU&gt; Menu list --&gt;
+&lt;!-- &lt;MENU COMPACT&gt; Compact list style --&gt;
+
+&lt;!ELEMENT LI - O %flow&gt;
+&lt;!ATTLIST LI
+ %SDAFORM; "LItem"
+ &gt;
+
+&lt;!-- &lt;LI&gt; List item --&gt;
+
+&lt;!--========== Document Body ===================--&gt;
+
+&lt;![ %HTML.Recommended [
+ &lt;!ENTITY % body.content "(%heading|%block|HR|ADDRESS|IMG)*"
+ -- &lt;h1&gt;Heading&lt;/h1&gt;
+ &lt;p&gt;Text ...
+ is preferred to
+ &lt;h1&gt;Heading&lt;/h1&gt;
+ Text ...
+ --&gt;
+]]&gt;
+
+&lt;!ENTITY % body.content "(%heading | %text | %block |
+ HR | ADDRESS)*"&gt;
+
+&lt;!ELEMENT BODY O O %body.content&gt;
+
+&lt;!-- &lt;BODY&gt; Document body --&gt;
+
+&lt;!ELEMENT BLOCKQUOTE - - %body.content&gt;
+&lt;!ATTLIST BLOCKQUOTE
+ %SDAFORM; "BQ"
+ &gt;
+
+&lt;!-- &lt;BLOCKQUOTE&gt; Quoted passage --&gt;
+
+&lt;!ELEMENT ADDRESS - - (%text|P)*&gt;
+&lt;!ATTLIST ADDRESS
+ %SDAFORM; "Lit"
+ %SDAPREF; "Address:&amp;#RE;"
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 57]</span>
+<a name="page-58" id="page-58" href="#page-58" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ &gt;
+
+&lt;!-- &lt;ADDRESS&gt; Address, signature, or byline --&gt;
+
+
+&lt;!--======= Forms ====================--&gt;
+
+&lt;![ %HTML.Forms [
+
+&lt;!ELEMENT FORM - - %body.content -(FORM) +(INPUT|SELECT|TEXTAREA)&gt;
+&lt;!ATTLIST FORM
+ ACTION CDATA #IMPLIED
+ METHOD (%HTTP-Method) GET
+ ENCTYPE %Content-Type; "application/x-www-form-urlencoded"
+ %SDAPREF; "&lt;Para&gt;Form:&lt;/Para&gt;"
+ %SDASUFF; "&lt;Para&gt;Form End.&lt;/Para&gt;"
+ &gt;
+
+&lt;!-- &lt;FORM&gt; Fill-out or data-entry form --&gt;
+&lt;!-- &lt;FORM ACTION="..."&gt; Address for completed form --&gt;
+&lt;!-- &lt;FORM METHOD=...&gt; Method of submitting form --&gt;
+&lt;!-- &lt;FORM ENCTYPE="..."&gt; Representation of form data --&gt;
+
+&lt;!ENTITY % InputType "(TEXT | PASSWORD | CHECKBOX |
+ RADIO | SUBMIT | RESET |
+ IMAGE | HIDDEN )"&gt;
+&lt;!ELEMENT INPUT - O EMPTY&gt;
+&lt;!ATTLIST INPUT
+ TYPE %InputType TEXT
+ NAME CDATA #IMPLIED
+ VALUE CDATA #IMPLIED
+ SRC CDATA #IMPLIED
+ CHECKED (CHECKED) #IMPLIED
+ SIZE CDATA #IMPLIED
+ MAXLENGTH NUMBER #IMPLIED
+ ALIGN (top|middle|bottom) #IMPLIED
+ %SDAPREF; "Input: "
+ &gt;
+
+&lt;!-- &lt;INPUT&gt; Form input datum --&gt;
+&lt;!-- &lt;INPUT TYPE=...&gt; Type of input interaction --&gt;
+&lt;!-- &lt;INPUT NAME=...&gt; Name of form datum --&gt;
+&lt;!-- &lt;INPUT VALUE="..."&gt; Default/initial/selected value --&gt;
+&lt;!-- &lt;INPUT SRC="..."&gt; Address of image --&gt;
+&lt;!-- &lt;INPUT CHECKED&gt; Initial state is "on" --&gt;
+&lt;!-- &lt;INPUT SIZE=...&gt; Field size hint --&gt;
+&lt;!-- &lt;INPUT MAXLENGTH=...&gt; Data length maximum --&gt;
+&lt;!-- &lt;INPUT ALIGN=...&gt; Image alignment --&gt;
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 58]</span>
+<a name="page-59" id="page-59" href="#page-59" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+&lt;!ELEMENT SELECT - - (OPTION+) -(INPUT|SELECT|TEXTAREA)&gt;
+&lt;!ATTLIST SELECT
+ NAME CDATA #REQUIRED
+ SIZE NUMBER #IMPLIED
+ MULTIPLE (MULTIPLE) #IMPLIED
+ %SDAFORM; "List"
+ %SDAPREF;
+ "&lt;LHead&gt;Select #AttVal(Multiple)&lt;/LHead&gt;"
+ &gt;
+
+&lt;!-- &lt;SELECT&gt; Selection of option(s) --&gt;
+&lt;!-- &lt;SELECT NAME=...&gt; Name of form datum --&gt;
+&lt;!-- &lt;SELECT SIZE=...&gt; Options displayed at a time --&gt;
+&lt;!-- &lt;SELECT MULTIPLE&gt; Multiple selections allowed --&gt;
+
+&lt;!ELEMENT OPTION - O (#PCDATA)*&gt;
+&lt;!ATTLIST OPTION
+ SELECTED (SELECTED) #IMPLIED
+ VALUE CDATA #IMPLIED
+ %SDAFORM; "LItem"
+ %SDAPREF;
+ "Option: #AttVal(Value) #AttVal(Selected)"
+ &gt;
+
+&lt;!-- &lt;OPTION&gt; A selection option --&gt;
+&lt;!-- &lt;OPTION SELECTED&gt; Initial state --&gt;
+&lt;!-- &lt;OPTION VALUE="..."&gt; Form datum value for this option--&gt;
+
+&lt;!ELEMENT TEXTAREA - - (#PCDATA)* -(INPUT|SELECT|TEXTAREA)&gt;
+&lt;!ATTLIST TEXTAREA
+ NAME CDATA #REQUIRED
+ ROWS NUMBER #REQUIRED
+ COLS NUMBER #REQUIRED
+ %SDAFORM; "Para"
+ %SDAPREF; "Input Text -- #AttVal(Name): "
+ &gt;
+
+&lt;!-- &lt;TEXTAREA&gt; An area for text input --&gt;
+&lt;!-- &lt;TEXTAREA NAME=...&gt; Name of form datum --&gt;
+&lt;!-- &lt;TEXTAREA ROWS=...&gt; Height of area --&gt;
+&lt;!-- &lt;TEXTAREA COLS=...&gt; Width of area --&gt;
+
+]]&gt;
+
+
+&lt;!--======= Document Head ======================--&gt;
+
+&lt;![ %HTML.Recommended [
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 59]</span>
+<a name="page-60" id="page-60" href="#page-60" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ &lt;!ENTITY % head.extra ""&gt;
+]]&gt;
+&lt;!ENTITY % head.extra "&amp; NEXTID?"&gt;
+
+&lt;!ENTITY % head.content "TITLE &amp; ISINDEX? &amp; BASE? %head.extra"&gt;
+
+&lt;!ELEMENT HEAD O O (%head.content) +(META|LINK)&gt;
+
+&lt;!-- &lt;HEAD&gt; Document head --&gt;
+
+&lt;!ELEMENT TITLE - - (#PCDATA)* -(META|LINK)&gt;
+&lt;!ATTLIST TITLE
+ %SDAFORM; "Ti" &gt;
+
+&lt;!-- &lt;TITLE&gt; Title of document --&gt;
+
+&lt;!ELEMENT LINK - O EMPTY&gt;
+&lt;!ATTLIST LINK
+ HREF CDATA #REQUIRED
+ %linkExtraAttributes;
+ %SDAPREF; "Linked to : #AttVal (TITLE) (URN) (HREF)&gt;" &gt;
+
+&lt;!-- &lt;LINK&gt; Link from this document --&gt;
+&lt;!-- &lt;LINK HREF="..."&gt; Address of link destination --&gt;
+&lt;!-- &lt;LINK URN="..."&gt; Lasting name of destination --&gt;
+&lt;!-- &lt;LINK REL=...&gt; Relationship to destination --&gt;
+&lt;!-- &lt;LINK REV=...&gt; Relationship of destination to this --&gt;
+&lt;!-- &lt;LINK TITLE="..."&gt; Title of destination (advisory) --&gt;
+&lt;!-- &lt;LINK METHODS="..."&gt; Operations allowed (advisory) --&gt;
+
+&lt;!ELEMENT ISINDEX - O EMPTY&gt;
+&lt;!ATTLIST ISINDEX
+ %SDAPREF;
+ "&lt;Para&gt;[Document is indexed/searchable.]&lt;/Para&gt;"&gt;
+
+&lt;!-- &lt;ISINDEX&gt; Document is a searchable index --&gt;
+
+&lt;!ELEMENT BASE - O EMPTY&gt;
+&lt;!ATTLIST BASE
+ HREF CDATA #REQUIRED &gt;
+
+&lt;!-- &lt;BASE&gt; Base context document --&gt;
+&lt;!-- &lt;BASE HREF="..."&gt; Address for this document --&gt;
+
+&lt;!ELEMENT NEXTID - O EMPTY&gt;
+&lt;!ATTLIST NEXTID
+ N CDATA #REQUIRED &gt;
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 60]</span>
+<a name="page-61" id="page-61" href="#page-61" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+&lt;!-- &lt;NEXTID&gt; Next ID to use for link name --&gt;
+&lt;!-- &lt;NEXTID N=...&gt; Next ID to use for link name --&gt;
+
+&lt;!ELEMENT META - O EMPTY&gt;
+&lt;!ATTLIST META
+ HTTP-EQUIV NAME #IMPLIED
+ NAME NAME #IMPLIED
+ CONTENT CDATA #REQUIRED &gt;
+
+&lt;!-- &lt;META&gt; Generic Meta-information --&gt;
+&lt;!-- &lt;META HTTP-EQUIV=...&gt; HTTP response header name --&gt;
+&lt;!-- &lt;META NAME=...&gt; Meta-information name --&gt;
+&lt;!-- &lt;META CONTENT="..."&gt; Associated information --&gt;
+
+&lt;!--======= Document Structure =================--&gt;
+
+&lt;![ %HTML.Deprecated [
+ &lt;!ENTITY % html.content "HEAD, BODY, PLAINTEXT?"&gt;
+]]&gt;
+&lt;!ENTITY % html.content "HEAD, BODY"&gt;
+
+&lt;!ELEMENT HTML O O (%html.content)&gt;
+&lt;!ENTITY % version.attr "VERSION CDATA #FIXED '%HTML.Version;'"&gt;
+
+&lt;!ATTLIST HTML
+ %version.attr;
+ %SDAFORM; "Book"
+ &gt;
+
+&lt;!-- &lt;HTML&gt; HTML Document --&gt;
+
+<span class="h3"><a name="section-9.2">9.2</a>. Strict HTML DTD</span>
+
+ This document type declaration refers to the HTML DTD with the
+ `HTML.Recommended' entity defined as `INCLUDE' rather than IGNORE;
+ that is, it refers to the more structurally rigid definition of HTML.
+
+&lt;!-- html-s.dtd
+
+ Document Type Definition for the HyperText Markup Language
+ with strict validation (HTML Strict DTD).
+
+ $Id: html-s.dtd,v 1.3 1995/06/02 18:55:46 connolly Exp $
+
+ Author: Daniel W. Connolly &lt;connolly@w3.org&gt;
+ See Also: <a href="http://www.w3.org/hypertext/WWW/MarkUp/MarkUp.html">http://www.w3.org/hypertext/WWW/MarkUp/MarkUp.html</a>
+--&gt;
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 61]</span>
+<a name="page-62" id="page-62" href="#page-62" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+&lt;!ENTITY % HTML.Version
+ "-//IETF//DTD HTML 2.0 Strict//EN"
+
+ -- Typical usage:
+
+ &lt;!DOCTYPE HTML PUBLIC
+ "-//IETF//DTD HTML Strict//EN"&gt;
+ &lt;html&gt;
+ ...
+ &lt;/html&gt;
+ --
+ &gt;
+
+&lt;!-- Feature Test Entities --&gt;
+&lt;!ENTITY % HTML.Recommended "INCLUDE"&gt;
+
+&lt;!ENTITY % html PUBLIC "-//IETF//DTD HTML 2.0//EN"&gt;
+%html;
+
+<span class="h3"><a name="section-9.3">9.3</a>. Level 1 HTML DTD</span>
+
+ This document type declaration refers to the HTML DTD with the
+ `HTML.Forms' entity defined as `IGNORE' rather than `INCLUDE'.
+ Documents which contain &lt;FORM&gt; elements do not conform to this DTD,
+ and must use the level 2 DTD.
+
+&lt;!-- html-1.dtd
+
+ Document Type Definition for the HyperText Markup Language
+ with Level 1 Extensions (HTML Level 1 DTD).
+
+ $Id: html-1.dtd,v 1.2 1995/03/29 18:53:10 connolly Exp $
+
+ Author: Daniel W. Connolly &lt;connolly@w3.org&gt;
+ See Also: <a href="http://info.cern.ch/hypertext/WWW/MarkUp/MarkUp.html">http://info.cern.ch/hypertext/WWW/MarkUp/MarkUp.html</a>
+
+--&gt;
+
+&lt;!ENTITY % HTML.Version
+ "-//IETF//DTD HTML 2.0 Level 1//EN"
+
+ -- Typical usage:
+
+ &lt;!DOCTYPE HTML PUBLIC
+ "-//IETF//DTD HTML Level 1//EN"&gt;
+ &lt;html&gt;
+ ...
+ &lt;/html&gt;
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 62]</span>
+<a name="page-63" id="page-63" href="#page-63" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ --
+ &gt;
+
+&lt;!-- Feature Test Entities --&gt;
+&lt;!ENTITY % HTML.Forms "IGNORE"&gt;
+
+&lt;!ENTITY % html PUBLIC "-//IETF//DTD HTML 2.0//EN"&gt;
+%html;
+
+<span class="h3"><a name="section-9.4">9.4</a>. Strict Level 1 HTML DTD</span>
+
+ This document type declaration refers to the level 1 HTML DTD with
+ the `HTML.Recommended' entity defined as `INCLUDE' rather than
+ IGNORE; that is, it refers to the more structurally rigid definition
+ of HTML.
+
+&lt;!-- html-1s.dtd
+
+ Document Type Definition for the HyperText Markup Language
+ Struct Level 1
+
+ $Id: html-1s.dtd,v 1.3 1995/06/02 18:55:43 connolly Exp $
+
+ Author: Daniel W. Connolly &lt;connolly@w3.org&gt;
+ See Also: <a href="http://www.w3.org/hypertext/WWW/MarkUp/MarkUp.html">http://www.w3.org/hypertext/WWW/MarkUp/MarkUp.html</a>
+--&gt;
+
+&lt;!ENTITY % HTML.Version
+ "-//IETF//DTD HTML 2.0 Strict Level 1//EN"
+
+ -- Typical usage:
+
+ &lt;!DOCTYPE HTML PUBLIC
+ "-//IETF//DTD HTML Strict Level 1//EN"&gt;
+ &lt;html&gt;
+ ...
+ &lt;/html&gt;
+ --
+ &gt;
+
+&lt;!-- Feature Test Entities --&gt;
+
+
+&lt;!ENTITY % HTML.Recommended "INCLUDE"&gt;
+
+&lt;!ENTITY % html-1 PUBLIC "-//IETF//DTD HTML 2.0 Level 1//EN"&gt;
+%html-1;
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 63]</span>
+<a name="page-64" id="page-64" href="#page-64" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h3"><a name="section-9.5">9.5</a>. SGML Declaration for HTML</span>
+
+ This is the SGML Declaration for HyperText Markup Language.
+
+&lt;!SGML "ISO 8879:1986"
+--
+ SGML Declaration for HyperText Markup Language (HTML).
+
+--
+
+CHARSET
+ BASESET "ISO 646:1983//CHARSET
+ International Reference Version
+ (IRV)//ESC 2/5 4/0"
+ DESCSET 0 9 UNUSED
+ 9 2 9
+ 11 2 UNUSED
+ 13 1 13
+ 14 18 UNUSED
+ 32 95 32
+ 127 1 UNUSED
+ BASESET "ISO Registration Number 100//CHARSET
+ ECMA-94 Right Part of
+ Latin Alphabet Nr. 1//ESC 2/13 4/1"
+
+ DESCSET 128 32 UNUSED
+ 160 96 32
+
+<span class="h1"><a name="appendix-CAPACITY">CAPACITY</a> SGMLREF</span>
+ TOTALCAP 150000
+ GRPCAP 150000
+ ENTCAP 150000
+
+<span class="h1"><a name="appendix-SCOPE">SCOPE</a> DOCUMENT</span>
+SYNTAX
+ SHUNCHAR CONTROLS 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
+ 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 127
+ BASESET "ISO 646:1983//CHARSET
+ International Reference Version
+ (IRV)//ESC 2/5 4/0"
+ DESCSET 0 128 0
+ FUNCTION
+ RE 13
+ RS 10
+ SPACE 32
+ TAB SEPCHAR 9
+ NAMING LCNMSTRT ""
+ UCNMSTRT ""
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 64]</span>
+<a name="page-65" id="page-65" href="#page-65" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ LCNMCHAR ".-"
+ UCNMCHAR ".-"
+ NAMECASE GENERAL YES
+ ENTITY NO
+ DELIM GENERAL SGMLREF
+ SHORTREF SGMLREF
+ NAMES SGMLREF
+ QUANTITY SGMLREF
+ ATTSPLEN 2100
+ LITLEN 1024
+ NAMELEN 72 -- somewhat arbitrary; taken from
+ internet line length conventions --
+ PILEN 1024
+ TAGLVL 100
+ TAGLEN 2100
+ GRPGTCNT 150
+ GRPCNT 64
+
+FEATURES
+ MINIMIZE
+ DATATAG NO
+ OMITTAG YES
+ RANK NO
+ SHORTTAG YES
+ LINK
+ SIMPLE NO
+ IMPLICIT NO
+ EXPLICIT NO
+ OTHER
+ CONCUR NO
+ SUBDOC NO
+ FORMAL YES
+ APPINFO "SDA" -- conforming SGML Document Access application
+ --
+&gt;
+&lt;!--
+ $Id: html.decl,v 1.17 1995/06/08 14:59:32 connolly Exp $
+
+ Author: Daniel W. Connolly &lt;connolly@w3.org&gt;
+
+ See also: <a href="http://www.w3.org/hypertext/WWW/MarkUp/MarkUp.html">http://www.w3.org/hypertext/WWW/MarkUp/MarkUp.html</a>
+ --&gt;
+
+<span class="h3"><a name="section-9.6">9.6</a>. Sample SGML Open Entity Catalog for HTML</span>
+
+ The SGML standard describes an "entity manager" as the portion or
+ component of an SGML system that maps SGML entities into the actual
+ storage model (e.g., the file system). The standard itself does not
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 65]</span>
+<a name="page-66" id="page-66" href="#page-66" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ define a particular mapping methodology or notation.
+
+ To assist the interoperability among various SGML tools and systems,
+ the SGML Open consortium has passed a technical resolution that
+ defines a format for an application-independent entity catalog that
+ maps external identifiers and/or entity names to file names.
+
+ Each entry in the catalog associates a storage object identifier
+ (such as a file name) with information about the external entity that
+ appears in the SGML document. In addition to entries that associate
+ public identifiers, a catalog entry can associate an entity name with
+ a storage object identifier. For example, the following are possible
+ catalog entries:
+
+ -- catalog: SGML Open style entity catalog for HTML --
+ -- $Id: catalog,v 1.3 1995/09/21 23:30:23 connolly Exp $ --
+
+ -- Ways to refer to Level 2: most general to most specific --
+<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML//EN" html.dtd</span>
+<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML 2.0//EN" html.dtd</span>
+<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML Level 2//EN" html.dtd</span>
+<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML 2.0 Level 2//EN" html.dtd</span>
+
+ -- Ways to refer to Level 1: most general to most specific --
+<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML Level 1//EN" html-1.dtd</span>
+<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML 2.0 Level 1//EN" html-1.dtd</span>
+
+ -- Ways to refer to
+ Strict Level 2: most general to most specific --
+<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML Strict//EN" html-s.dtd</span>
+<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML 2.0 Strict//EN" html-s.dtd</span>
+<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML Strict Level 2//EN" html-s.dtd</span>
+<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML 2.0 Strict Level 2//EN" html-s.dtd</span>
+
+ -- Ways to refer to
+ Strict Level 1: most general to most specific --
+<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML Strict Level 1//EN" html-1s.dtd</span>
+<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "-//IETF//DTD HTML 2.0 Strict Level 1//EN" html-1s.dtd</span>
+
+ -- ISO latin 1 entity set for HTML --
+<span class="h1"><a name="appendix-PUBLIC">PUBLIC</a> "ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML" ISOlat1\</span>
+sgml
+
+<span class="h3"><a name="section-9.7">9.7</a>. Character Entity Sets</span>
+
+ The HTML DTD defines the following entities. They represent
+ particular graphic characters which have special meanings in places
+ in the markup, or may not be part of the character set available to
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 66]</span>
+<a name="page-67" id="page-67" href="#page-67" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ the writer.
+
+<span class="h4"><a name="section-9.7.1">9.7.1</a>. Numeric and Special Graphic Entity Set</span>
+
+ The following table lists each of the characters included from the
+ Numeric and Special Graphic entity set, along with its name, syntax
+ for use, and description. This list is derived from `ISO Standard
+ 8879:1986//ENTITIES Numeric and Special Graphic//EN'. However, HTML
+ does not include for the entire entity set -- only the entities
+ listed below are included.
+
+ GLYPH NAME SYNTAX DESCRIPTION
+ &lt; lt &amp;lt; Less than sign
+ &gt; gt &amp;gt; Greater than signn
+ &amp; amp &amp;amp; Ampersand
+ " quot &amp;quot; Double quote sign
+
+<span class="h4"><a name="section-9.7.2">9.7.2</a>. ISO Latin 1 Character Entity Set</span>
+
+ The following public text lists each of the characters specified in
+ the Added Latin 1 entity set, along with its name, syntax for use,
+ and description. This list is derived from ISO Standard
+ 8879:1986//ENTITIES Added Latin 1//EN. HTML includes the entire
+ entity set.
+
+&lt;!-- (C) International Organization for Standardization 1986
+ Permission to copy in any form is granted for use with
+ conforming SGML systems and applications as defined in
+ ISO 8879, provided this notice is included in all copies.
+--&gt;
+&lt;!-- Character entity set. Typical invocation:
+ &lt;!ENTITY % ISOlat1 PUBLIC
+ "ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML"&gt;
+ %ISOlat1;
+--&gt;
+&lt;!-- Modified for use in HTML
+ $Id: ISOlat1.sgml,v 1.2 1994/11/30 23:45:12 connolly Exp $ --&gt;
+&lt;!ENTITY AElig CDATA "&amp;#198;" -- capital AE diphthong (ligature) --&gt;
+&lt;!ENTITY Aacute CDATA "&amp;#193;" -- capital A, acute accent --&gt;
+&lt;!ENTITY Acirc CDATA "&amp;#194;" -- capital A, circumflex accent --&gt;
+&lt;!ENTITY Agrave CDATA "&amp;#192;" -- capital A, grave accent --&gt;
+&lt;!ENTITY Aring CDATA "&amp;#197;" -- capital A, ring --&gt;
+&lt;!ENTITY Atilde CDATA "&amp;#195;" -- capital A, tilde --&gt;
+&lt;!ENTITY Auml CDATA "&amp;#196;" -- capital A, dieresis or umlaut mark --&gt;
+&lt;!ENTITY Ccedil CDATA "&amp;#199;" -- capital C, cedilla --&gt;
+&lt;!ENTITY ETH CDATA "&amp;#208;" -- capital Eth, Icelandic --&gt;
+&lt;!ENTITY Eacute CDATA "&amp;#201;" -- capital E, acute accent --&gt;
+&lt;!ENTITY Ecirc CDATA "&amp;#202;" -- capital E, circumflex accent --&gt;
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 67]</span>
+<a name="page-68" id="page-68" href="#page-68" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+&lt;!ENTITY Egrave CDATA "&amp;#200;" -- capital E, grave accent --&gt;
+&lt;!ENTITY Euml CDATA "&amp;#203;" -- capital E, dieresis or umlaut mark --&gt;
+&lt;!ENTITY Iacute CDATA "&amp;#205;" -- capital I, acute accent --&gt;
+&lt;!ENTITY Icirc CDATA "&amp;#206;" -- capital I, circumflex accent --&gt;
+&lt;!ENTITY Igrave CDATA "&amp;#204;" -- capital I, grave accent --&gt;
+&lt;!ENTITY Iuml CDATA "&amp;#207;" -- capital I, dieresis or umlaut mark --&gt;
+&lt;!ENTITY Ntilde CDATA "&amp;#209;" -- capital N, tilde --&gt;
+&lt;!ENTITY Oacute CDATA "&amp;#211;" -- capital O, acute accent --&gt;
+&lt;!ENTITY Ocirc CDATA "&amp;#212;" -- capital O, circumflex accent --&gt;
+&lt;!ENTITY Ograve CDATA "&amp;#210;" -- capital O, grave accent --&gt;
+&lt;!ENTITY Oslash CDATA "&amp;#216;" -- capital O, slash --&gt;
+&lt;!ENTITY Otilde CDATA "&amp;#213;" -- capital O, tilde --&gt;
+&lt;!ENTITY Ouml CDATA "&amp;#214;" -- capital O, dieresis or umlaut mark --&gt;
+&lt;!ENTITY THORN CDATA "&amp;#222;" -- capital THORN, Icelandic --&gt;
+&lt;!ENTITY Uacute CDATA "&amp;#218;" -- capital U, acute accent --&gt;
+&lt;!ENTITY Ucirc CDATA "&amp;#219;" -- capital U, circumflex accent --&gt;
+&lt;!ENTITY Ugrave CDATA "&amp;#217;" -- capital U, grave accent --&gt;
+&lt;!ENTITY Uuml CDATA "&amp;#220;" -- capital U, dieresis or umlaut mark --&gt;
+&lt;!ENTITY Yacute CDATA "&amp;#221;" -- capital Y, acute accent --&gt;
+&lt;!ENTITY aacute CDATA "&amp;#225;" -- small a, acute accent --&gt;
+&lt;!ENTITY acirc CDATA "&amp;#226;" -- small a, circumflex accent --&gt;
+&lt;!ENTITY aelig CDATA "&amp;#230;" -- small ae diphthong (ligature) --&gt;
+&lt;!ENTITY agrave CDATA "&amp;#224;" -- small a, grave accent --&gt;
+&lt;!ENTITY aring CDATA "&amp;#229;" -- small a, ring --&gt;
+&lt;!ENTITY atilde CDATA "&amp;#227;" -- small a, tilde --&gt;
+&lt;!ENTITY auml CDATA "&amp;#228;" -- small a, dieresis or umlaut mark --&gt;
+&lt;!ENTITY ccedil CDATA "&amp;#231;" -- small c, cedilla --&gt;
+&lt;!ENTITY eacute CDATA "&amp;#233;" -- small e, acute accent --&gt;
+&lt;!ENTITY ecirc CDATA "&amp;#234;" -- small e, circumflex accent --&gt;
+&lt;!ENTITY egrave CDATA "&amp;#232;" -- small e, grave accent --&gt;
+&lt;!ENTITY eth CDATA "&amp;#240;" -- small eth, Icelandic --&gt;
+&lt;!ENTITY euml CDATA "&amp;#235;" -- small e, dieresis or umlaut mark --&gt;
+&lt;!ENTITY iacute CDATA "&amp;#237;" -- small i, acute accent --&gt;
+&lt;!ENTITY icirc CDATA "&amp;#238;" -- small i, circumflex accent --&gt;
+&lt;!ENTITY igrave CDATA "&amp;#236;" -- small i, grave accent --&gt;
+&lt;!ENTITY iuml CDATA "&amp;#239;" -- small i, dieresis or umlaut mark --&gt;
+&lt;!ENTITY ntilde CDATA "&amp;#241;" -- small n, tilde --&gt;
+&lt;!ENTITY oacute CDATA "&amp;#243;" -- small o, acute accent --&gt;
+&lt;!ENTITY ocirc CDATA "&amp;#244;" -- small o, circumflex accent --&gt;
+&lt;!ENTITY ograve CDATA "&amp;#242;" -- small o, grave accent --&gt;
+&lt;!ENTITY oslash CDATA "&amp;#248;" -- small o, slash --&gt;
+&lt;!ENTITY otilde CDATA "&amp;#245;" -- small o, tilde --&gt;
+&lt;!ENTITY ouml CDATA "&amp;#246;" -- small o, dieresis or umlaut mark --&gt;
+&lt;!ENTITY szlig CDATA "&amp;#223;" -- small sharp s, German (sz ligature)-&gt;
+&lt;!ENTITY thorn CDATA "&amp;#254;" -- small thorn, Icelandic --&gt;
+&lt;!ENTITY uacute CDATA "&amp;#250;" -- small u, acute accent --&gt;
+&lt;!ENTITY ucirc CDATA "&amp;#251;" -- small u, circumflex accent --&gt;
+&lt;!ENTITY ugrave CDATA "&amp;#249;" -- small u, grave accent --&gt;
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 68]</span>
+<a name="page-69" id="page-69" href="#page-69" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+&lt;!ENTITY uuml CDATA "&amp;#252;" -- small u, dieresis or umlaut mark --&gt;
+&lt;!ENTITY yacute CDATA "&amp;#253;" -- small y, acute accent --&gt;
+&lt;!ENTITY yuml CDATA "&amp;#255;" -- small y, dieresis or umlaut mark --&gt;
+
+<span class="h2"><a name="section-10">10</a>. Security Considerations</span>
+
+ Anchors, embedded images, and all other elements which contain URIs
+ as parameters may cause the URI to be dereferenced in response to
+ user input. In this case, the security considerations of [<a href="#ref-URL" title='"Uniform Resource Locators (URL)"'>URL</a>] apply.
+
+ The widely deployed methods for submitting forms requests -- HTTP and
+ SMTP -- provide little assurance of confidentiality. Information
+ providers who request sensitive information via forms -- especially
+ by way of the `PASSWORD' type input field (see 8.1.2, "Input Field:
+ INPUT") -- should be aware and make their users aware of the lack of
+ confidentiality.
+
+<span class="h2"><a name="section-11">11</a>. References</span>
+
+ [<a name="ref-URI" id="ref-URI">URI</a>]
+ Berners-Lee, T., "Universal Resource Identifiers in WWW:
+ A Unifying Syntax for the Expression of Names and
+ Addresses of Objects on the Network as used in the
+ World- Wide Web", <a href="./rfc1630">RFC 1630</a>, CERN, June 1994.
+ &lt;URL:ftp://ds.internic.net/rfc/rfc1630.txt&gt;
+
+ [<a name="ref-URL" id="ref-URL">URL</a>]
+ Berners-Lee, T., Masinter, L., and M. McCahill, "Uniform
+ Resource Locators (URL)", <a href="./rfc1738">RFC 1738</a>, CERN, Xerox PARC,
+ University of Minnesota, December 1994.
+ &lt;URL:ftp://ds.internic.net/rfc/rfc1738.txt&gt;
+
+ [<a name="ref-HTTP" id="ref-HTTP">HTTP</a>]
+ Berners-Lee, T., Fielding, R., and H. Frystyk Nielsen,
+ "Hypertext Transfer Protocol - HTTP/1.0", Work in
+ Progress, MIT, UC Irvine, CERN, March 1995.
+
+ [<a name="ref-MIME" id="ref-MIME">MIME</a>]
+ Borenstein, N., and N. Freed. "MIME (Multipurpose
+ Internet Mail Extensions) Part One: Mechanisms for
+ Specifying and Describing the Format of Internet Message
+ Bodies", <a href="./rfc1521">RFC 1521</a>, Bellcore, Innosoft, September 1993.
+ &lt;URL:ftp://ds.internic.net/rfc/rfc1521.txt&gt;
+
+ [<a name="ref-RELURL" id="ref-RELURL">RELURL</a>]
+ Fielding, R., "Relative Uniform Resource Locators", <a href="./rfc1808">RFC</a>
+ <a href="./rfc1808">1808</a>, June 1995
+ &lt;URL:ftp://ds.internic.net/rfc/rfc1808.txt&gt;
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 69]</span>
+<a name="page-70" id="page-70" href="#page-70" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ [<a name="ref-GOLD90" id="ref-GOLD90">GOLD90</a>]
+ Goldfarb, C., "The SGML Handbook", Y. Rubinsky, Ed.,
+ Oxford University Press, 1990.
+
+ [<a name="ref-DEXTER" id="ref-DEXTER">DEXTER</a>]
+ Frank Halasz and Mayer Schwartz, "The Dexter Hypertext
+ Reference Model", Communications of the ACM, pp.
+ 30-39, vol. 37 no. 2, Feb 1994.
+
+ [<a name="ref-IMEDIA" id="ref-IMEDIA">IMEDIA</a>]
+ Postel, J., "Media Type Registration Procedure",
+ <a href="./rfc1590">RFC 1590</a>, USC/Information Sciences Institute, March 1994.
+ &lt;URL:ftp://ds.internic.net/rfc/rfc1590.txt&gt;
+
+ [<a name="ref-IANA" id="ref-IANA">IANA</a>]
+ Reynolds, J., and J. Postel, "Assigned Numbers", STD 2,
+ <a href="./rfc1700">RFC 1700</a>, USC/Information Sciecnes Institute, October
+ 1994. &lt;URL:ftp://ds.internic.net/rfc/rfc1700.txt&gt;
+
+ [<a name="ref-SQ91" id="ref-SQ91">SQ91</a>]
+ SoftQuad. "The SGML Primer", 3rd ed., SoftQuad Inc.,
+ 1991. &lt;URL:http://www.sq.com/&gt;
+
+ [<a name="ref-ISO-646" id="ref-ISO-646">ISO-646</a>]
+ ISO/IEC 646:1991 Information technology -- ISO 7-bit
+ coded character set for information interchange
+ &lt;URL:http://www.iso.ch/cate/d4777.html&gt;
+
+ [<a name="ref-ISO-10646" id="ref-ISO-10646">ISO-10646</a>]
+ ISO/IEC 10646-1:1993 Information technology -- Universal
+ Multiple-Octet Coded Character Set (UCS) -- Part 1:
+ Architecture and Basic Multilingual Plane
+ &lt;URL:http://www.iso.ch/cate/d18741.html&gt;
+
+ [<a name="ref-ISO-8859-1" id="ref-ISO-8859-1">ISO-8859-1</a>]
+ ISO 8859. International Standard -- Information
+ Processing -- 8-bit Single-Byte Coded Graphic Character
+ Sets -- Part 1: Latin Alphabet No. 1, ISO 8859-1:1987.
+ &lt;URL:http://www.iso.ch/cate/d16338.html&gt;
+
+ [<a name="ref-SGML" id="ref-SGML">SGML</a>]
+ ISO 8879. Information Processing -- Text and Office
+ Systems - Standard Generalized Markup Language (SGML),
+ 1986. &lt;URL:http://www.iso.ch/cate/d16387.html&gt;
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 70]</span>
+<a name="page-71" id="page-71" href="#page-71" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h2"><a name="section-12">12</a>. Acknowledgments</span>
+
+ The HTML document type was designed by Tim Berners-Lee at CERN as
+ part of the 1990 World Wide Web project. In 1992, Dan Connolly wrote
+ the HTML Document Type Definition (DTD) and a brief HTML
+ specification.
+
+ Since 1993, a wide variety of Internet participants have contributed
+ to the evolution of HTML, which has included the addition of in-line
+ images introduced by the NCSA Mosaic software for WWW. Dave Raggett
+ played an important role in deriving the forms material from the
+ HTML+ specification.
+
+ Dan Connolly and Karen Olson Muldrow rewrote the HTML Specification
+ in 1994. The document was then edited by the HTML working group as a
+ whole, with updates being made by Eric Schieler, Mike Knezovich, and
+ Eric W. Sink at Spyglass, Inc. Finally, Roy Fielding restructured
+ the entire draft into its current form.
+
+ Special thanks to the many active participants in the HTML working
+ group, too numerous to list individually, without whom there would be
+ no standards process and no standard. That this document approaches
+ its objective of carefully converging a description of current
+ practice and formalization of HTML's relationship to SGML is a
+ tribute to their effort.
+
+<span class="h3"><a name="section-12.1">12.1</a>. Authors' Addresses</span>
+
+ Tim Berners-Lee
+ Director, W3 Consortium
+ MIT Laboratory for Computer Science
+ 545 Technology Square
+ Cambridge, MA 02139, U.S.A.
+
+ Phone: +1 (617) 253 9670
+ Fax: +1 (617) 258 8682
+ EMail: timbl@w3.org
+
+
+ Daniel W. Connolly
+ Research Technical Staff, W3 Consortium
+ MIT Laboratory for Computer Science
+ 545 Technology Square
+ Cambridge, MA 02139, U.S.A.
+
+ Phone: +1 (617) 258 8682
+ EMail: connolly@w3.org
+ URI: <a href="http://www.w3.org/hypertext/WWW/People/Connolly/">http://www.w3.org/hypertext/WWW/People/Connolly/</a>
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 71]</span>
+<a name="page-72" id="page-72" href="#page-72" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+<span class="h2"><a name="section-13">13</a>. The HTML Coded Character Set</span>
+
+ This list details the code positions and characters of the HTML
+ document character set, specified in 9.5, "SGML Declaration for
+ HTML". This coded character set is based on [<a href="#ref-ISO-8859-1">ISO-8859-1</a>].
+
+ REFERENCE DESCRIPTION
+ -------------- -----------
+ &amp;#00; - &amp;#08; Unused
+ &amp;#09; Horizontal tab
+ &amp;#10; Line feed
+ &amp;#11; - &amp;#12; Unused
+ &amp;#13; Carriage Return
+ &amp;#14; - &amp;#31; Unused
+ &amp;#32; Space
+ &amp;#33; Exclamation mark
+ &amp;#34; Quotation mark
+ &amp;#35; Number sign
+ &amp;#36; Dollar sign
+ &amp;#37; Percent sign
+ &amp;#38; Ampersand
+ &amp;#39; Apostrophe
+ &amp;#40; Left parenthesis
+ &amp;#41; Right parenthesis
+ &amp;#42; Asterisk
+ &amp;#43; Plus sign
+ &amp;#44; Comma
+ &amp;#45; Hyphen
+ &amp;#46; Period (fullstop)
+ &amp;#47; Solidus (slash)
+ &amp;#48; - &amp;#57; Digits 0-9
+ &amp;#58; Colon
+ &amp;#59; Semi-colon
+ &amp;#60; Less than
+ &amp;#61; Equals sign
+ &amp;#62; Greater than
+ &amp;#63; Question mark
+ &amp;#64; Commercial at
+ &amp;#65; - &amp;#90; Letters A-Z
+ &amp;#91; Left square bracket
+ &amp;#92; Reverse solidus (backslash)
+ &amp;#93; Right square bracket
+ &amp;#94; Caret
+ &amp;#95; Horizontal bar (underscore)
+ &amp;#96; Acute accent
+ &amp;#97; - &amp;#122; Letters a-z
+ &amp;#123; Left curly brace
+ &amp;#124; Vertical bar
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 72]</span>
+<a name="page-73" id="page-73" href="#page-73" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ &amp;#125; Right curly brace
+ &amp;#126; Tilde
+ &amp;#127; - &amp;#159; Unused
+ &amp;#160; Non-breaking Space
+ &amp;#161; Inverted exclamation
+ &amp;#162; Cent sign
+ &amp;#163; Pound sterling
+ &amp;#164; General currency sign
+ &amp;#165; Yen sign
+ &amp;#166; Broken vertical bar
+ &amp;#167; Section sign
+ &amp;#168; Umlaut (dieresis)
+ &amp;#169; Copyright
+ &amp;#170; Feminine ordinal
+ &amp;#171; Left angle quote, guillemotleft
+ &amp;#172; Not sign
+ &amp;#173; Soft hyphen
+ &amp;#174; Registered trademark
+ &amp;#175; Macron accent
+ &amp;#176; Degree sign
+ &amp;#177; Plus or minus
+ &amp;#178; Superscript two
+ &amp;#179; Superscript three
+ &amp;#180; Acute accent
+ &amp;#181; Micro sign
+ &amp;#182; Paragraph sign
+ &amp;#183; Middle dot
+ &amp;#184; Cedilla
+ &amp;#185; Superscript one
+ &amp;#186; Masculine ordinal
+ &amp;#187; Right angle quote, guillemotright
+ &amp;#188; Fraction one-fourth
+ &amp;#189; Fraction one-half
+ &amp;#190; Fraction three-fourths
+ &amp;#191; Inverted question mark
+ &amp;#192; Capital A, grave accent
+ &amp;#193; Capital A, acute accent
+ &amp;#194; Capital A, circumflex accent
+ &amp;#195; Capital A, tilde
+ &amp;#196; Capital A, dieresis or umlaut mark
+ &amp;#197; Capital A, ring
+ &amp;#198; Capital AE dipthong (ligature)
+ &amp;#199; Capital C, cedilla
+ &amp;#200; Capital E, grave accent
+ &amp;#201; Capital E, acute accent
+ &amp;#202; Capital E, circumflex accent
+ &amp;#203; Capital E, dieresis or umlaut mark
+ &amp;#204; Capital I, grave accent
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 73]</span>
+<a name="page-74" id="page-74" href="#page-74" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ &amp;#205; Capital I, acute accent
+ &amp;#206; Capital I, circumflex accent
+ &amp;#207; Capital I, dieresis or umlaut mark
+ &amp;#208; Capital Eth, Icelandic
+ &amp;#209; Capital N, tilde
+ &amp;#210; Capital O, grave accent
+ &amp;#211; Capital O, acute accent
+ &amp;#212; Capital O, circumflex accent
+ &amp;#213; Capital O, tilde
+ &amp;#214; Capital O, dieresis or umlaut mark
+ &amp;#215; Multiply sign
+ &amp;#216; Capital O, slash
+ &amp;#217; Capital U, grave accent
+ &amp;#218; Capital U, acute accent
+ &amp;#219; Capital U, circumflex accent
+ &amp;#220; Capital U, dieresis or umlaut mark
+ &amp;#221; Capital Y, acute accent
+ &amp;#222; Capital THORN, Icelandic
+ &amp;#223; Small sharp s, German (sz ligature)
+ &amp;#224; Small a, grave accent
+ &amp;#225; Small a, acute accent
+ &amp;#226; Small a, circumflex accent
+ &amp;#227; Small a, tilde
+ &amp;#228; Small a, dieresis or umlaut mark
+ &amp;#229; Small a, ring
+ &amp;#230; Small ae dipthong (ligature)
+ &amp;#231; Small c, cedilla
+ &amp;#232; Small e, grave accent
+ &amp;#233; Small e, acute accent
+ &amp;#234; Small e, circumflex accent
+ &amp;#235; Small e, dieresis or umlaut mark
+ &amp;#236; Small i, grave accent
+ &amp;#237; Small i, acute accent
+ &amp;#238; Small i, circumflex accent
+ &amp;#239; Small i, dieresis or umlaut mark
+ &amp;#240; Small eth, Icelandic
+ &amp;#241; Small n, tilde
+ &amp;#242; Small o, grave accent
+ &amp;#243; Small o, acute accent
+ &amp;#244; Small o, circumflex accent
+ &amp;#245; Small o, tilde
+ &amp;#246; Small o, dieresis or umlaut mark
+ &amp;#247; Division sign
+ &amp;#248; Small o, slash
+ &amp;#249; Small u, grave accent
+ &amp;#250; Small u, acute accent
+ &amp;#251; Small u, circumflex accent
+ &amp;#252; Small u, dieresis or umlaut mark
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 74]</span>
+<a name="page-75" id="page-75" href="#page-75" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ &amp;#253; Small y, acute accent
+ &amp;#254; Small thorn, Icelandic
+ &amp;#255; Small y, dieresis or umlaut mark
+
+<span class="h2"><a name="section-14">14</a>. Proposed Entities</span>
+
+ The HTML DTD references the "Added Latin 1" entity set, which only
+ supplies named entities for a subset of the non-ASCII characters in
+ [<a href="#ref-ISO-8859-1">ISO-8859-1</a>], namely the accented characters. The following entities
+ should be supported so that all ISO 8859-1 characters may only be
+ referenced symbolically. The names for these entities are taken from
+ the appendixes of [<a href="#ref-SGML">SGML</a>].
+
+ &lt;!ENTITY nbsp CDATA "&amp;#160;" -- no-break space --&gt;
+ &lt;!ENTITY iexcl CDATA "&amp;#161;" -- inverted exclamation mark --&gt;
+ &lt;!ENTITY cent CDATA "&amp;#162;" -- cent sign --&gt;
+ &lt;!ENTITY pound CDATA "&amp;#163;" -- pound sterling sign --&gt;
+ &lt;!ENTITY curren CDATA "&amp;#164;" -- general currency sign --&gt;
+ &lt;!ENTITY yen CDATA "&amp;#165;" -- yen sign --&gt;
+ &lt;!ENTITY brvbar CDATA "&amp;#166;" -- broken (vertical) bar --&gt;
+ &lt;!ENTITY sect CDATA "&amp;#167;" -- section sign --&gt;
+ &lt;!ENTITY uml CDATA "&amp;#168;" -- umlaut (dieresis) --&gt;
+ &lt;!ENTITY copy CDATA "&amp;#169;" -- copyright sign --&gt;
+ &lt;!ENTITY ordf CDATA "&amp;#170;" -- ordinal indicator, feminine --&gt;
+ &lt;!ENTITY laquo CDATA "&amp;#171;" -- angle quotation mark, left --&gt;
+ &lt;!ENTITY not CDATA "&amp;#172;" -- not sign --&gt;
+ &lt;!ENTITY shy CDATA "&amp;#173;" -- soft hyphen --&gt;
+ &lt;!ENTITY reg CDATA "&amp;#174;" -- registered sign --&gt;
+ &lt;!ENTITY macr CDATA "&amp;#175;" -- macron --&gt;
+ &lt;!ENTITY deg CDATA "&amp;#176;" -- degree sign --&gt;
+ &lt;!ENTITY plusmn CDATA "&amp;#177;" -- plus-or-minus sign --&gt;
+ &lt;!ENTITY sup2 CDATA "&amp;#178;" -- superscript two --&gt;
+ &lt;!ENTITY sup3 CDATA "&amp;#179;" -- superscript three --&gt;
+ &lt;!ENTITY acute CDATA "&amp;#180;" -- acute accent --&gt;
+ &lt;!ENTITY micro CDATA "&amp;#181;" -- micro sign --&gt;
+ &lt;!ENTITY para CDATA "&amp;#182;" -- pilcrow (paragraph sign) --&gt;
+ &lt;!ENTITY middot CDATA "&amp;#183;" -- middle dot --&gt;
+ &lt;!ENTITY cedil CDATA "&amp;#184;" -- cedilla --&gt;
+ &lt;!ENTITY sup1 CDATA "&amp;#185;" -- superscript one --&gt;
+ &lt;!ENTITY ordm CDATA "&amp;#186;" -- ordinal indicator, masculine --&gt;
+ &lt;!ENTITY raquo CDATA "&amp;#187;" -- angle quotation mark, right --&gt;
+ &lt;!ENTITY frac14 CDATA "&amp;#188;" -- fraction one-quarter --&gt;
+ &lt;!ENTITY frac12 CDATA "&amp;#189;" -- fraction one-half --&gt;
+ &lt;!ENTITY frac34 CDATA "&amp;#190;" -- fraction three-quarters --&gt;
+ &lt;!ENTITY iquest CDATA "&amp;#191;" -- inverted question mark --&gt;
+ &lt;!ENTITY Agrave CDATA "&amp;#192;" -- capital A, grave accent --&gt;
+ &lt;!ENTITY Aacute CDATA "&amp;#193;" -- capital A, acute accent --&gt;
+ &lt;!ENTITY Acirc CDATA "&amp;#194;" -- capital A, circumflex accent --&gt;
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 75]</span>
+<a name="page-76" id="page-76" href="#page-76" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ &lt;!ENTITY Atilde CDATA "&amp;#195;" -- capital A, tilde --&gt;
+ &lt;!ENTITY Auml CDATA "&amp;#196;" -- capital A, dieresis or umlaut mark --&gt;
+ &lt;!ENTITY Aring CDATA "&amp;#197;" -- capital A, ring --&gt;
+ &lt;!ENTITY AElig CDATA "&amp;#198;" -- capital AE diphthong (ligature) --&gt;
+ &lt;!ENTITY Ccedil CDATA "&amp;#199;" -- capital C, cedilla --&gt;
+ &lt;!ENTITY Egrave CDATA "&amp;#200;" -- capital E, grave accent --&gt;
+ &lt;!ENTITY Eacute CDATA "&amp;#201;" -- capital E, acute accent --&gt;
+ &lt;!ENTITY Ecirc CDATA "&amp;#202;" -- capital E, circumflex accent --&gt;
+ &lt;!ENTITY Euml CDATA "&amp;#203;" -- capital E, dieresis or umlaut mark --&gt;
+ &lt;!ENTITY Igrave CDATA "&amp;#204;" -- capital I, grave accent --&gt;
+ &lt;!ENTITY Iacute CDATA "&amp;#205;" -- capital I, acute accent --&gt;
+ &lt;!ENTITY Icirc CDATA "&amp;#206;" -- capital I, circumflex accent --&gt;
+ &lt;!ENTITY Iuml CDATA "&amp;#207;" -- capital I, dieresis or umlaut mark --&gt;
+ &lt;!ENTITY ETH CDATA "&amp;#208;" -- capital Eth, Icelandic --&gt;
+ &lt;!ENTITY Ntilde CDATA "&amp;#209;" -- capital N, tilde --&gt;
+ &lt;!ENTITY Ograve CDATA "&amp;#210;" -- capital O, grave accent --&gt;
+ &lt;!ENTITY Oacute CDATA "&amp;#211;" -- capital O, acute accent --&gt;
+ &lt;!ENTITY Ocirc CDATA "&amp;#212;" -- capital O, circumflex accent --&gt;
+ &lt;!ENTITY Otilde CDATA "&amp;#213;" -- capital O, tilde --&gt;
+ &lt;!ENTITY Ouml CDATA "&amp;#214;" -- capital O, dieresis or umlaut mark --&gt;
+ &lt;!ENTITY times CDATA "&amp;#215;" -- multiply sign --&gt;
+ &lt;!ENTITY Oslash CDATA "&amp;#216;" -- capital O, slash --&gt;
+ &lt;!ENTITY Ugrave CDATA "&amp;#217;" -- capital U, grave accent --&gt;
+ &lt;!ENTITY Uacute CDATA "&amp;#218;" -- capital U, acute accent --&gt;
+ &lt;!ENTITY Ucirc CDATA "&amp;#219;" -- capital U, circumflex accent --&gt;
+ &lt;!ENTITY Uuml CDATA "&amp;#220;" -- capital U, dieresis or umlaut mark --&gt;
+ &lt;!ENTITY Yacute CDATA "&amp;#221;" -- capital Y, acute accent --&gt;
+ &lt;!ENTITY THORN CDATA "&amp;#222;" -- capital THORN, Icelandic --&gt;
+ &lt;!ENTITY szlig CDATA "&amp;#223;" -- small sharp s, German (sz ligature) --&gt;
+ &lt;!ENTITY agrave CDATA "&amp;#224;" -- small a, grave accent --&gt;
+ &lt;!ENTITY aacute CDATA "&amp;#225;" -- small a, acute accent --&gt;
+ &lt;!ENTITY acirc CDATA "&amp;#226;" -- small a, circumflex accent --&gt;
+ &lt;!ENTITY atilde CDATA "&amp;#227;" -- small a, tilde --&gt;
+ &lt;!ENTITY auml CDATA "&amp;#228;" -- small a, dieresis or umlaut mark --&gt;
+ &lt;!ENTITY aring CDATA "&amp;#229;" -- small a, ring --&gt;
+ &lt;!ENTITY aelig CDATA "&amp;#230;" -- small ae diphthong (ligature) --&gt;
+ &lt;!ENTITY ccedil CDATA "&amp;#231;" -- small c, cedilla --&gt;
+ &lt;!ENTITY egrave CDATA "&amp;#232;" -- small e, grave accent --&gt;
+ &lt;!ENTITY eacute CDATA "&amp;#233;" -- small e, acute accent --&gt;
+ &lt;!ENTITY ecirc CDATA "&amp;#234;" -- small e, circumflex accent --&gt;
+ &lt;!ENTITY euml CDATA "&amp;#235;" -- small e, dieresis or umlaut mark --&gt;
+ &lt;!ENTITY igrave CDATA "&amp;#236;" -- small i, grave accent --&gt;
+ &lt;!ENTITY iacute CDATA "&amp;#237;" -- small i, acute accent --&gt;
+ &lt;!ENTITY icirc CDATA "&amp;#238;" -- small i, circumflex accent --&gt;
+ &lt;!ENTITY iuml CDATA "&amp;#239;" -- small i, dieresis or umlaut mark --&gt;
+ &lt;!ENTITY eth CDATA "&amp;#240;" -- small eth, Icelandic --&gt;
+ &lt;!ENTITY ntilde CDATA "&amp;#241;" -- small n, tilde --&gt;
+ &lt;!ENTITY ograve CDATA "&amp;#242;" -- small o, grave accent --&gt;
+
+
+
+<span class="grey">Berners-Lee &amp; Connolly Standards Track [Page 76]</span>
+<a name="page-77" id="page-77" href="#page-77" class="invisible"><span class="break"> </span></a>
+<span class="grey"><a href="./rfc1866">RFC 1866</a> Hypertext Markup Language - 2.0 November 1995</span>
+
+
+ &lt;!ENTITY oacute CDATA "&amp;#243;" -- small o, acute accent --&gt;
+ &lt;!ENTITY ocirc CDATA "&amp;#244;" -- small o, circumflex accent --&gt;
+ &lt;!ENTITY otilde CDATA "&amp;#245;" -- small o, tilde --&gt;
+ &lt;!ENTITY ouml CDATA "&amp;#246;" -- small o, dieresis or umlaut mark --&gt;
+ &lt;!ENTITY divide CDATA "&amp;#247;" -- divide sign --&gt;
+ &lt;!ENTITY oslash CDATA "&amp;#248;" -- small o, slash --&gt;
+ &lt;!ENTITY ugrave CDATA "&amp;#249;" -- small u, grave accent --&gt;
+ &lt;!ENTITY uacute CDATA "&amp;#250;" -- small u, acute accent --&gt;
+ &lt;!ENTITY ucirc CDATA "&amp;#251;" -- small u, circumflex accent --&gt;
+ &lt;!ENTITY uuml CDATA "&amp;#252;" -- small u, dieresis or umlaut mark --&gt;
+ &lt;!ENTITY yacute CDATA "&amp;#253;" -- small y, acute accent --&gt;
+ &lt;!ENTITY thorn CDATA "&amp;#254;" -- small thorn, Icelandic --&gt;
+ &lt;!ENTITY yuml CDATA "&amp;#255;" -- small y, dieresis or umlaut mark --&gt;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Berners-Lee &amp; Connolly Standards Track [Page 77]
+<span class="break"> </span>
+
+</pre><br />
+<span class="noprint"><small><small>Html markup produced by rfcmarkup 1.60, available from
+<a href="http://tools.ietf.org/tools/rfcmarkup/">http://tools.ietf.org/tools/rfcmarkup/</a>
+</small></small></span>
+</body></html>
diff --git a/doc/rfc3513.htm b/doc/rfc3513.htm
new file mode 100644
index 0000000..0b8bfb6
--- /dev/null
+++ b/doc/rfc3513.htm
@@ -0,0 +1,1579 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xml:lang="en" lang="en"><head>
+
+
+ <meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
+ <meta name="robots" content="index,follow">
+ <meta name="creator" content="rfcmarkup version 1.46">
+ <link rel="icon" href="http://tools.ietf.org/images/rfc.png" type="image/png">
+ <link rel="shortcut icon" href="http://tools.ietf.org/images/rfc.png" type="image/png"><title>RFC 3513 Internet Protocol Version 6 (IPv6) Addressing Architecture</title>
+
+
+ <style type="text/css">
+ body {
+ margin: 0px 8px;
+ font-size: 1em;
+ }
+ h1, h2, h3, h4, h5, h6, .h1, .h2, .h3, .h4, .h5, .h6 {
+ font-weight: bold;
+ line-height: 0pt;
+ display: inline;
+ white-space: pre;
+ font-family: monospace;
+ font-size: 1em;
+ font-weight: bold;
+ }
+ pre {
+ font-size: 1em;
+ }
+ .pre {
+ white-space: pre;
+ font-family: monospace;
+ }
+ .header{
+ font-weight: bold;
+ }
+ @media print {
+ body {
+ font-size: 10.5pt;
+ }
+ h1, h2, h3, h4, h5, h6 {
+ font-size: 10.5pt;
+ }
+
+ a:link, a:visited {
+ color: inherit;
+ text-decoration: none;
+ }
+ .break {
+ page-break-before: always;
+ text-decoration: none;
+ }
+ .noprint {
+ display: none;
+ }
+ }
+ @media screen {
+ .grey, .grey a:link, .grey a:visited {
+ color: #777;
+ }
+ .break {
+ text-decoration: none;
+ display: none;
+ }
+ .docinfo {
+ background-color: #EEE;
+ }
+ .top {
+ border-top: 2px solid #EEE;
+ }
+ .bgwhite { background-color: white; }
+ .bgred { background-color: #F44; }
+ .bggrey { background-color: #666; }
+ .bgbrown { background-color: #840; }
+ .bgorange { background-color: #FA0; }
+ .bgyellow { background-color: #EE0; }
+ .bgmagenta{ background-color: #F4F; }
+ .bgblue { background-color: #66F; }
+ .bgcyan { background-color: #4DD; }
+ .bggreen { background-color: #4F4; }
+
+ .legend { font-size: 90%; }
+ .cplate { font-size: 70%; border: solid grey 1px; }
+ }
+ </style>
+
+ <script type="text/javascript"><!--
+ function addHeaderTags() {
+ var spans = document.getElementsByTagName("span");
+ for (var i=0; i < spans.length; i++) {
+ var elem = spans[i];
+ if (elem) {
+ var level = elem.getAttribute("class");
+ if (level == "h1" || level == "h2" || level == "h3" || level == "h4" || level == "h5" || level == "h6") {
+ elem.innerHTML = "<"+level+">"+elem.innerHTML+"</"+level+">";
+ }
+ }
+ }
+ }
+ var legend_html = "Colour legend:<br /> <table> <tr><td>Unknown:</td> <td><span class='cplate bgwhite'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Draft:</td> <td><span class='cplate bgred'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Informational:</td> <td><span class='cplate bgorange'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Experimental:</td> <td><span class='cplate bgyellow'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Best Common Practice:</td><td><span class='cplate bgmagenta'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Proposed Standard:</td><td><span class='cplate bgblue'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Draft Standard:</td> <td><span class='cplate bgcyan'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Standard:</td> <td><span class='cplate bggreen'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Historic:</td> <td><span class='cplate bggrey'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Obsolete:</td> <td><span class='cplate bgbrown'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> </table>";
+ function showElem(id) {
+ var elem = document.getElementById(id);
+ elem.innerHTML = eval(id+"_html");
+ elem.style.visibility='visible';
+ }
+ function hideElem(id) {
+ var elem = document.getElementById(id);
+ elem.style.visibility='hidden';
+ elem.innerHTML = "";
+ }
+ // -->
+ </script></head><body onload="addHeaderTags()">
+ <div style="height: 8px;">
+ <span style="cursor: pointer;" onmouseover="this.style.cursor='pointer';" onclick="showElem('legend');" onmouseout="hideElem('legend')" class="pre noprint docinfo bgbrown" title="Click for colour legend."> </span>
+ <div id="legend" class="docinfo noprint pre legend" style="border: 1px solid rgb(51, 68, 85); padding: 4px 9px 5px 7px; position: absolute; top: 4px; left: 4ex; visibility: hidden; background-color: white;" onmouseover="showElem('legend');" onmouseout="hideElem('legend');"></div>
+ </div>
+<span class="pre noprint docinfo top">[<a href="http://tools.ietf.org/html/">RFCs/IDs</a>] [<a href="http://tools.ietf.org/rfc/rfc3513.txt">Plain Text</a>] [From <a href="http://tools.ietf.org/html/draft-ietf-ipngwg-addr-arch-v3">draft-ietf-ipngwg-addr-arch-v3</a>] </span><br>
+<span class="pre noprint docinfo"> </span><br>
+<span class="pre noprint docinfo">Obsoleted by: <a href="http://tools.ietf.org/html/rfc4291">4291</a> PROPOSED STANDARD</span><br>
+<span class="pre noprint docinfo"> </span><br>
+<pre>Network Working Group R. Hinden
+Request for Comments: 3513 Nokia
+Obsoletes: <a href="http://tools.ietf.org/html/rfc2373">2373</a> S. Deering
+Category: Standards Track Cisco Systems
+ April 2003
+
+
+ <span class="h1"><h1>Internet Protocol Version 6 (IPv6) Addressing Architecture</h1></span>
+
+Status of this Memo
+
+ This document specifies an Internet standards track protocol for the
+ Internet community, and requests discussion and suggestions for
+ improvements. Please refer to the current edition of the "Internet
+ Official Protocol Standards" (STD 1) for the standardization state
+ and status of this protocol. Distribution of this memo is unlimited.
+
+Copyright Notice
+
+ Copyright (C) The Internet Society (2003). All Rights Reserved.
+
+Abstract
+
+ This specification defines the addressing architecture of the IP
+ Version 6 (IPv6) protocol. The document includes the IPv6 addressing
+ model, text representations of IPv6 addresses, definition of IPv6
+ unicast addresses, anycast addresses, and multicast addresses, and an
+ IPv6 node's required addresses.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 1]</span>
+<a name="page-2" id="page-2" href="#page-2"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+Table of Contents
+
+ <a href="#section-1">1</a>. Introduction.................................................<a href="#page-3">3</a>
+ <a href="#section-2">2</a>. IPv6 Addressing..............................................<a href="#page-3">3</a>
+ <a href="#section-2.1">2.1</a> Addressing Model.........................................<a href="#page-4">4</a>
+ <a href="#section-2.2">2.2</a> Text Representation of Addresses.........................<a href="#page-4">4</a>
+ <a href="#section-2.3">2.3</a> Text Representation of Address Prefixes..................<a href="#page-5">5</a>
+ <a href="#section-2.4">2.4</a> Address Type Identification..............................<a href="#page-6">6</a>
+ <a href="#section-2.5">2.5</a> Unicast Addresses........................................<a href="#page-7">7</a>
+ <a href="#section-2.5.1">2.5.1</a> Interface Identifiers..............................<a href="#page-8">8</a>
+ <a href="#section-2.5.2">2.5.2</a> The Unspecified Address............................<a href="#page-9">9</a>
+ <a href="#section-2.5.3">2.5.3</a> The Loopback Address...............................<a href="#page-9">9</a>
+ <a href="#section-2.5.4">2.5.4</a> Global Unicast Addresses..........................<a href="#page-10">10</a>
+ <a href="#section-2.5.5">2.5.5</a> IPv6 Addresses with Embedded IPv4 Addresses.......<a href="#page-10">10</a>
+ <a href="#section-2.5.6">2.5.6</a> Local-use IPv6 Unicast Addresses..................<a href="#page-11">11</a>
+ <a href="#section-2.6">2.6</a> Anycast Addresses.......................................<a href="#page-12">12</a>
+ <a href="#section-2.6.1">2.6.1</a> Required Anycast Address..........................<a href="#page-13">13</a>
+ <a href="#section-2.7">2.7</a> Multicast Addresses.....................................<a href="#page-13">13</a>
+ <a href="#section-2.7.1">2.7.1</a> Pre-Defined Multicast Addresses...................<a href="#page-15">15</a>
+ <a href="#section-2.8">2.8</a> A Node's Required Addresses.............................<a href="#page-17">17</a>
+ <a href="#section-3">3</a>. Security Considerations.....................................<a href="#page-17">17</a>
+ <a href="#section-4">4</a>. IANA Considerations.........................................<a href="#page-18">18</a>
+ <a href="#section-5">5</a>. References..................................................<a href="#page-19">19</a>
+ <a href="#section-5.1">5.1</a> Normative References....................................<a href="#page-19">19</a>
+ <a href="#section-5.2">5.2</a> Informative References..................................<a href="#page-19">19</a>
+ APPENDIX A: Creating Modified EUI-64 format Interface IDs......<a href="#page-21">21</a>
+ APPENDIX B: Changes from <a href="http://tools.ietf.org/html/rfc2373">RFC-2373</a>..............................<a href="#page-24">24</a>
+ Authors' Addresses.............................................<a href="#page-25">25</a>
+ Full Copyright Statement.......................................<a href="#page-26">26</a>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 2]</span>
+<a name="page-3" id="page-3" href="#page-3"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+<span class="h2"><h2><a name="section-1">1</a>. Introduction</h2></span>
+
+ This specification defines the addressing architecture of the IP
+ Version 6 (IPv6) protocol. It includes the basic formats for the
+ various types of IPv6 addresses (unicast, anycast, and multicast).
+
+ The authors would like to acknowledge the contributions of Paul
+ Francis, Scott Bradner, Jim Bound, Brian Carpenter, Matt Crawford,
+ Deborah Estrin, Roger Fajman, Bob Fink, Peter Ford, Bob Gilligan,
+ Dimitry Haskin, Tom Harsch, Christian Huitema, Tony Li, Greg
+ Minshall, Thomas Narten, Erik Nordmark, Yakov Rekhter, Bill Simpson,
+ Sue Thomson, Markku Savela, and Larry Masinter.
+
+<span class="h2"><h2><a name="section-2">2</a>. IPv6 Addressing</h2></span>
+
+ IPv6 addresses are 128-bit identifiers for interfaces and sets of
+ interfaces (where "interface" is as defined in <a href="#section-2">section 2</a> of [<a href="#ref-IPV6" title="&quot;Internet Protocol, Version 6 (IPv6) Specification&quot;">IPV6</a>]).
+ There are three types of addresses:
+
+ Unicast: An identifier for a single interface. A packet sent to a
+ unicast address is delivered to the interface identified
+ by that address.
+
+ Anycast: An identifier for a set of interfaces (typically belonging
+ to different nodes). A packet sent to an anycast address
+ is delivered to one of the interfaces identified by that
+ address (the "nearest" one, according to the routing
+ protocols' measure of distance).
+
+ Multicast: An identifier for a set of interfaces (typically belonging
+ to different nodes). A packet sent to a multicast address
+ is delivered to all interfaces identified by that address.
+
+ There are no broadcast addresses in IPv6, their function being
+ superseded by multicast addresses.
+
+ In this document, fields in addresses are given a specific name, for
+ example "subnet". When this name is used with the term "ID" for
+ identifier after the name (e.g., "subnet ID"), it refers to the
+ contents of the named field. When it is used with the term "prefix"
+ (e.g., "subnet prefix") it refers to all of the address from the left
+ up to and including this field.
+
+ In IPv6, all zeros and all ones are legal values for any field,
+ unless specifically excluded. Specifically, prefixes may contain, or
+ end with, zero-valued fields.
+
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 3]</span>
+<a name="page-4" id="page-4" href="#page-4"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+<span class="h3"><h3><a name="section-2.1">2.1</a> Addressing Model</h3></span>
+
+ IPv6 addresses of all types are assigned to interfaces, not nodes.
+ An IPv6 unicast address refers to a single interface. Since each
+ interface belongs to a single node, any of that node's interfaces'
+ unicast addresses may be used as an identifier for the node.
+
+ All interfaces are required to have at least one link-local unicast
+ address (see <a href="#section-2.8">section 2.8</a> for additional required addresses). A
+ single interface may also have multiple IPv6 addresses of any type
+ (unicast, anycast, and multicast) or scope. Unicast addresses with
+ scope greater than link-scope are not needed for interfaces that are
+ not used as the origin or destination of any IPv6 packets to or from
+ non-neighbors. This is sometimes convenient for point-to-point
+ interfaces. There is one exception to this addressing model:
+
+ A unicast address or a set of unicast addresses may be assigned to
+ multiple physical interfaces if the implementation treats the
+ multiple physical interfaces as one interface when presenting it
+ to the internet layer. This is useful for load-sharing over
+ multiple physical interfaces.
+
+ Currently IPv6 continues the IPv4 model that a subnet prefix is
+ associated with one link. Multiple subnet prefixes may be assigned
+ to the same link.
+
+<span class="h3"><h3><a name="section-2.2">2.2</a> Text Representation of Addresses</h3></span>
+
+ There are three conventional forms for representing IPv6 addresses as
+ text strings:
+
+ 1. The preferred form is x:x:x:x:x:x:x:x, where the 'x's are the
+ hexadecimal values of the eight 16-bit pieces of the address.
+
+ Examples:
+
+ FEDC:BA98:7654:3210:FEDC:BA98:7654:3210
+
+ 1080:0:0:0:8:800:200C:417A
+
+ Note that it is not necessary to write the leading zeros in an
+ individual field, but there must be at least one numeral in every
+ field (except for the case described in 2.).
+
+ 2. Due to some methods of allocating certain styles of IPv6
+ addresses, it will be common for addresses to contain long strings
+ of zero bits. In order to make writing addresses containing zero
+ bits easier a special syntax is available to compress the zeros.
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 4]</span>
+<a name="page-5" id="page-5" href="#page-5"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+ The use of "::" indicates one or more groups of 16 bits of zeros.
+ The "::" can only appear once in an address. The "::" can also be
+ used to compress leading or trailing zeros in an address.
+
+ For example, the following addresses:
+
+ 1080:0:0:0:8:800:200C:417A a unicast address
+ FF01:0:0:0:0:0:0:101 a multicast address
+ 0:0:0:0:0:0:0:1 the loopback address
+ 0:0:0:0:0:0:0:0 the unspecified addresses
+
+ may be represented as:
+
+ 1080::8:800:200C:417A a unicast address
+ FF01::101 a multicast address
+ ::1 the loopback address
+ :: the unspecified addresses
+
+ 3. An alternative form that is sometimes more convenient when dealing
+ with a mixed environment of IPv4 and IPv6 nodes is
+ x:x:x:x:x:x:d.d.d.d, where the 'x's are the hexadecimal values of
+ the six high-order 16-bit pieces of the address, and the 'd's are
+ the decimal values of the four low-order 8-bit pieces of the
+ address (standard IPv4 representation). Examples:
+
+ 0:0:0:0:0:0:13.1.68.3
+
+ 0:0:0:0:0:FFFF:129.144.52.38
+
+ or in compressed form:
+
+ ::13.1.68.3
+
+ ::FFFF:129.144.52.38
+
+<span class="h3"><h3><a name="section-2.3">2.3</a> Text Representation of Address Prefixes</h3></span>
+
+ The text representation of IPv6 address prefixes is similar to the
+ way IPv4 addresses prefixes are written in CIDR notation [<a href="#ref-CIDR" title="&quot;Classless Inter-Domain Routing (CIDR): An Address Assignment and Aggregation Strategy&quot;">CIDR</a>]. An
+ IPv6 address prefix is represented by the notation:
+
+ ipv6-address/prefix-length
+
+ where
+
+ ipv6-address is an IPv6 address in any of the notations listed
+ in <a href="#section-2.2">section 2.2</a>.
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 5]</span>
+<a name="page-6" id="page-6" href="#page-6"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+ prefix-length is a decimal value specifying how many of the
+ leftmost contiguous bits of the address comprise
+ the prefix.
+
+ For example, the following are legal representations of the 60-bit
+ prefix 12AB00000000CD3 (hexadecimal):
+
+ 12AB:0000:0000:CD30:0000:0000:0000:0000/60
+ 12AB::CD30:0:0:0:0/60
+ 12AB:0:0:CD30::/60
+
+ The following are NOT legal representations of the above prefix:
+
+ 12AB:0:0:CD3/60 may drop leading zeros, but not trailing zeros,
+ within any 16-bit chunk of the address
+
+ 12AB::CD30/60 address to left of "/" expands to
+ 12AB:0000:0000:0000:0000:000:0000:CD30
+
+ 12AB::CD3/60 address to left of "/" expands to
+ 12AB:0000:0000:0000:0000:000:0000:0CD3
+
+ When writing both a node address and a prefix of that node address
+ (e.g., the node's subnet prefix), the two can combined as follows:
+
+ the node address 12AB:0:0:CD30:123:4567:89AB:CDEF
+ and its subnet number 12AB:0:0:CD30::/60
+
+ can be abbreviated as 12AB:0:0:CD30:123:4567:89AB:CDEF/60
+
+<a href="#section-2.4">2.4</a> Address Type Identification
+
+ The type of an IPv6 address is identified by the high-order bits of
+ the address, as follows:
+
+ Address type Binary prefix IPv6 notation Section
+ ------------ ------------- ------------- -------
+ Unspecified 00...0 (128 bits) ::/128 2.5.2
+ Loopback 00...1 (128 bits) ::1/128 2.5.3
+ Multicast 11111111 FF00::/8 2.7
+ Link-local unicast 1111111010 FE80::/10 2.5.6
+ Site-local unicast 1111111011 FEC0::/10 2.5.6
+ Global unicast (everything else)
+
+ Anycast addresses are taken from the unicast address spaces (of any
+ scope) and are not syntactically distinguishable from unicast
+ addresses.
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 6]</span>
+<a name="page-7" id="page-7" href="#page-7"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+ The general format of global unicast addresses is described in
+ <a href="#section-2.5.4">section 2.5.4</a>. Some special-purpose subtypes of global unicast
+ addresses which contain embedded IPv4 addresses (for the purposes of
+ IPv4-IPv6 interoperation) are described in <a href="#section-2.5.5">section 2.5.5</a>.
+
+ Future specifications may redefine one or more sub-ranges of the
+ global unicast space for other purposes, but unless and until that
+ happens, implementations must treat all addresses that do not start
+ with any of the above-listed prefixes as global unicast addresses.
+
+<span class="h3"><h3><a name="section-2.5">2.5</a> Unicast Addresses</h3></span>
+
+ IPv6 unicast addresses are aggregable with prefixes of arbitrary
+ bit-length similar to IPv4 addresses under Classless Interdomain
+ Routing.
+
+ There are several types of unicast addresses in IPv6, in particular
+ global unicast, site-local unicast, and link-local unicast. There
+ are also some special-purpose subtypes of global unicast, such as
+ IPv6 addresses with embedded IPv4 addresses or encoded NSAP
+ addresses. Additional address types or subtypes can be defined in
+ the future.
+
+ IPv6 nodes may have considerable or little knowledge of the internal
+ structure of the IPv6 address, depending on the role the node plays
+ (for instance, host versus router). At a minimum, a node may
+ consider that unicast addresses (including its own) have no internal
+ structure:
+
+ | 128 bits |
+ +-----------------------------------------------------------------+
+ | node address |
+ +-----------------------------------------------------------------+
+
+ A slightly sophisticated host (but still rather simple) may
+ additionally be aware of subnet prefix(es) for the link(s) it is
+ attached to, where different addresses may have different values for
+ n:
+
+ | n bits | 128-n bits |
+ +------------------------------------------------+----------------+
+ | subnet prefix | interface ID |
+ +------------------------------------------------+----------------+
+
+ Though a very simple router may have no knowledge of the internal
+ structure of IPv6 unicast addresses, routers will more generally have
+ knowledge of one or more of the hierarchical boundaries for the
+ operation of routing protocols. The known boundaries will differ
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 7]</span>
+<a name="page-8" id="page-8" href="#page-8"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+ from router to router, depending on what positions the router holds
+ in the routing hierarchy.
+
+<span class="h4"><h4><a name="section-2.5.1">2.5.1</a> Interface Identifiers</h4></span>
+
+ Interface identifiers in IPv6 unicast addresses are used to identify
+ interfaces on a link. They are required to be unique within a subnet
+ prefix. It is recommended that the same interface identifier not be
+ assigned to different nodes on a link. They may also be unique over
+ a broader scope. In some cases an interface's identifier will be
+ derived directly from that interface's link-layer address. The same
+ interface identifier may be used on multiple interfaces on a single
+ node, as long as they are attached to different subnets.
+
+ Note that the uniqueness of interface identifiers is independent of
+ the uniqueness of IPv6 addresses. For example, a global unicast
+ address may be created with a non-global scope interface identifier
+ and a site-local address may be created with a global scope interface
+ identifier.
+
+ For all unicast addresses, except those that start with binary value
+ 000, Interface IDs are required to be 64 bits long and to be
+ constructed in Modified EUI-64 format.
+
+ Modified EUI-64 format based Interface identifiers may have global
+ scope when derived from a global token (e.g., IEEE 802 48-bit MAC or
+ IEEE EUI-64 identifiers [<a href="#ref-EUI64" title="&quot;./rfc3513&quot;">EUI64</a>]) or may have local scope where a
+ global token is not available (e.g., serial links, tunnel end-points,
+ etc.) or where global tokens are undesirable (e.g., temporary tokens
+ for privacy [<a href="#ref-PRIV" title="&quot;Privacy Extensions for Stateless Address Autoconfiguration in IPv6&quot;">PRIV</a>]).
+
+ Modified EUI-64 format interface identifiers are formed by inverting
+ the "u" bit (universal/local bit in IEEE EUI-64 terminology) when
+ forming the interface identifier from IEEE EUI-64 identifiers. In
+ the resulting Modified EUI-64 format the "u" bit is set to one (1) to
+ indicate global scope, and it is set to zero (0) to indicate local
+ scope. The first three octets in binary of an IEEE EUI-64 identifier
+ are as follows:
+
+ 0 0 0 1 1 2
+ |0 7 8 5 6 3|
+ +----+----+----+----+----+----+
+ |cccc|ccug|cccc|cccc|cccc|cccc|
+ +----+----+----+----+----+----+
+
+ written in Internet standard bit-order , where "u" is the
+ universal/local bit, "g" is the individual/group bit, and "c" are the
+ bits of the company_id. Appendix A: "Creating Modified EUI-64 format
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 8]</span>
+<a name="page-9" id="page-9" href="#page-9"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+ Interface Identifiers" provides examples on the creation of Modified
+ EUI-64 format based interface identifiers.
+
+ The motivation for inverting the "u" bit when forming an interface
+ identifier is to make it easy for system administrators to hand
+ configure non-global identifiers when hardware tokens are not
+ available. This is expected to be case for serial links, tunnel end-
+ points, etc. The alternative would have been for these to be of the
+ form 0200:0:0:1, 0200:0:0:2, etc., instead of the much simpler 1, 2,
+ etc.
+
+ The use of the universal/local bit in the Modified EUI-64 format
+ identifier is to allow development of future technology that can take
+ advantage of interface identifiers with global scope.
+
+ The details of forming interface identifiers are defined in the
+ appropriate "IPv6 over &lt;link&gt;" specification such as "IPv6 over
+ Ethernet" [<a href="#ref-ETHER" title="&quot;Transmission of IPv6 Packets over Ethernet Networks&quot;">ETHER</a>], "IPv6 over FDDI" [<a href="#ref-FDDI" title="&quot;Transmission of IPv6 Packets over FDDI Networks&quot;">FDDI</a>], etc.
+
+<span class="h4"><h4><a name="section-2.5.2">2.5.2</a> The Unspecified Address</h4></span>
+
+ The address 0:0:0:0:0:0:0:0 is called the unspecified address. It
+ must never be assigned to any node. It indicates the absence of an
+ address. One example of its use is in the Source Address field of
+ any IPv6 packets sent by an initializing host before it has learned
+ its own address.
+
+ The unspecified address must not be used as the destination address
+ of IPv6 packets or in IPv6 Routing Headers. An IPv6 packet with a
+ source address of unspecified must never be forwarded by an IPv6
+ router.
+
+<span class="h4"><h4><a name="section-2.5.3">2.5.3</a> The Loopback Address</h4></span>
+
+ The unicast address 0:0:0:0:0:0:0:1 is called the loopback address.
+ It may be used by a node to send an IPv6 packet to itself. It may
+ never be assigned to any physical interface. It is treated as
+ having link-local scope, and may be thought of as the link-local
+ unicast address of a virtual interface (typically called "the
+ loopback interface") to an imaginary link that goes nowhere.
+
+ The loopback address must not be used as the source address in IPv6
+ packets that are sent outside of a single node. An IPv6 packet with
+ a destination address of loopback must never be sent outside of a
+ single node and must never be forwarded by an IPv6 router. A packet
+ received on an interface with destination address of loopback must be
+ dropped.
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 9]</span>
+<a name="page-10" id="page-10" href="#page-10"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+<span class="h4"><h4><a name="section-2.5.4">2.5.4</a> Global Unicast Addresses</h4></span>
+
+ The general format for IPv6 global unicast addresses is as follows:
+
+ | n bits | m bits | 128-n-m bits |
+ +------------------------+-----------+----------------------------+
+ | global routing prefix | subnet ID | interface ID |
+ +------------------------+-----------+----------------------------+
+
+ where the global routing prefix is a (typically hierarchically-
+ structured) value assigned to a site (a cluster of subnets/links),
+ the subnet ID is an identifier of a link within the site, and the
+ interface ID is as defined in <a href="#section-2.5.1">section 2.5.1</a>.
+
+ All global unicast addresses other than those that start with binary
+ 000 have a 64-bit interface ID field (i.e., n + m = 64), formatted as
+ described in <a href="#section-2.5.1">section 2.5.1</a>. Global unicast addresses that start with
+ binary 000 have no such constraint on the size or structure of the
+ interface ID field.
+
+ Examples of global unicast addresses that start with binary 000 are
+ the IPv6 address with embedded IPv4 addresses described in section
+ 2.5.5 and the IPv6 address containing encoded NSAP addresses
+ specified in [<a href="#ref-NSAP" title="&quot;OSI NSAPs and IPv6&quot;">NSAP</a>]. An example of global addresses starting with a
+ binary value other than 000 (and therefore having a 64-bit interface
+ ID field) can be found in [<a href="#ref-AGGR" title="&quot;An Aggregatable Global Unicast Address Format&quot;">AGGR</a>].
+
+<span class="h4"><h4><a name="section-2.5.5">2.5.5</a> IPv6 Addresses with Embedded IPv4 Addresses</h4></span>
+
+ The IPv6 transition mechanisms [<a href="#ref-TRAN" title="&quot;Transition Mechanisms for IPv6 Hosts and Routers&quot;">TRAN</a>] include a technique for hosts
+ and routers to dynamically tunnel IPv6 packets over IPv4 routing
+ infrastructure. IPv6 nodes that use this technique are assigned
+ special IPv6 unicast addresses that carry a global IPv4 address in
+ the low-order 32 bits. This type of address is termed an "IPv4-
+ compatible IPv6 address" and has the format:
+
+ | 80 bits | 16 | 32 bits |
+ +--------------------------------------+--------------------------+
+ |0000..............................0000|0000| IPv4 address |
+ +--------------------------------------+----+---------------------+
+
+ Note: The IPv4 address used in the "IPv4-compatible IPv6 address"
+ must be a globally-unique IPv4 unicast address.
+
+ A second type of IPv6 address which holds an embedded IPv4 address is
+ also defined. This address type is used to represent the addresses
+ of IPv4 nodes as IPv6 addresses. This type of address is termed an
+ "IPv4-mapped IPv6 address" and has the format:
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 10]</span>
+<a name="page-11" id="page-11" href="#page-11"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+ | 80 bits | 16 | 32 bits |
+ +--------------------------------------+--------------------------+
+ |0000..............................0000|FFFF| IPv4 address |
+ +--------------------------------------+----+---------------------+
+
+<span class="h4"><h4><a name="section-2.5.6">2.5.6</a> Local-Use IPv6 Unicast Addresses</h4></span>
+
+ There are two types of local-use unicast addresses defined. These
+ are Link-Local and Site-Local. The Link-Local is for use on a single
+ link and the Site-Local is for use in a single site. Link-Local
+ addresses have the following format:
+
+ | 10 |
+ | bits | 54 bits | 64 bits |
+ +----------+-------------------------+----------------------------+
+ |1111111010| 0 | interface ID |
+ +----------+-------------------------+----------------------------+
+
+ Link-Local addresses are designed to be used for addressing on a
+ single link for purposes such as automatic address configuration,
+ neighbor discovery, or when no routers are present.
+
+ Routers must not forward any packets with link-local source or
+ destination addresses to other links.
+
+ Site-Local addresses have the following format:
+
+ | 10 |
+ | bits | 54 bits | 64 bits |
+ +----------+-------------------------+----------------------------+
+ |1111111011| subnet ID | interface ID |
+ +----------+-------------------------+----------------------------+
+
+ Site-local addresses are designed to be used for addressing inside of
+ a site without the need for a global prefix. Although a subnet ID
+ may be up to 54-bits long, it is expected that globally-connected
+ sites will use the same subnet IDs for site-local and global
+ prefixes.
+
+ Routers must not forward any packets with site-local source or
+ destination addresses outside of the site.
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 11]</span>
+<a name="page-12" id="page-12" href="#page-12"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+<span class="h3"><h3><a name="section-2.6">2.6</a> Anycast Addresses</h3></span>
+
+ An IPv6 anycast address is an address that is assigned to more than
+ one interface (typically belonging to different nodes), with the
+ property that a packet sent to an anycast address is routed to the
+ "nearest" interface having that address, according to the routing
+ protocols' measure of distance.
+
+ Anycast addresses are allocated from the unicast address space, using
+ any of the defined unicast address formats. Thus, anycast addresses
+ are syntactically indistinguishable from unicast addresses. When a
+ unicast address is assigned to more than one interface, thus turning
+ it into an anycast address, the nodes to which the address is
+ assigned must be explicitly configured to know that it is an anycast
+ address.
+
+ For any assigned anycast address, there is a longest prefix P of that
+ address that identifies the topological region in which all
+ interfaces belonging to that anycast address reside. Within the
+ region identified by P, the anycast address must be maintained as a
+ separate entry in the routing system (commonly referred to as a "host
+ route"); outside the region identified by P, the anycast address may
+ be aggregated into the routing entry for prefix P.
+
+ Note that in the worst case, the prefix P of an anycast set may be
+ the null prefix, i.e., the members of the set may have no topological
+ locality. In that case, the anycast address must be maintained as a
+ separate routing entry throughout the entire internet, which presents
+ a severe scaling limit on how many such "global" anycast sets may be
+ supported. Therefore, it is expected that support for global anycast
+ sets may be unavailable or very restricted.
+
+ One expected use of anycast addresses is to identify the set of
+ routers belonging to an organization providing internet service.
+ Such addresses could be used as intermediate addresses in an IPv6
+ Routing header, to cause a packet to be delivered via a particular
+ service provider or sequence of service providers.
+
+ Some other possible uses are to identify the set of routers attached
+ to a particular subnet, or the set of routers providing entry into a
+ particular routing domain.
+
+ There is little experience with widespread, arbitrary use of internet
+ anycast addresses, and some known complications and hazards when
+ using them in their full generality [<a href="#ref-ANYCST" title="&quot;Host Anycasting Service&quot;">ANYCST</a>]. Until more experience
+ has been gained and solutions are specified, the following
+ restrictions are imposed on IPv6 anycast addresses:
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 12]</span>
+<a name="page-13" id="page-13" href="#page-13"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+ o An anycast address must not be used as the source address of an
+ IPv6 packet.
+
+ o An anycast address must not be assigned to an IPv6 host, that is,
+ it may be assigned to an IPv6 router only.
+
+<span class="h4"><h4><a name="section-2.6.1">2.6.1</a> Required Anycast Address</h4></span>
+
+ The Subnet-Router anycast address is predefined. Its format is as
+ follows:
+
+ | n bits | 128-n bits |
+ +------------------------------------------------+----------------+
+ | subnet prefix | 00000000000000 |
+ +------------------------------------------------+----------------+
+
+ The "subnet prefix" in an anycast address is the prefix which
+ identifies a specific link. This anycast address is syntactically
+ the same as a unicast address for an interface on the link with the
+ interface identifier set to zero.
+
+ Packets sent to the Subnet-Router anycast address will be delivered
+ to one router on the subnet. All routers are required to support the
+ Subnet-Router anycast addresses for the subnets to which they have
+ interfaces.
+
+ The subnet-router anycast address is intended to be used for
+ applications where a node needs to communicate with any one of the
+ set of routers.
+
+<span class="h3"><h3><a name="section-2.7">2.7</a> Multicast Addresses</h3></span>
+
+ An IPv6 multicast address is an identifier for a group of interfaces
+ (typically on different nodes). An interface may belong to any
+ number of multicast groups. Multicast addresses have the following
+ format:
+
+ | 8 | 4 | 4 | 112 bits |
+ +------ -+----+----+---------------------------------------------+
+ |11111111|flgs|scop| group ID |
+ +--------+----+----+---------------------------------------------+
+
+ binary 11111111 at the start of the address identifies the
+ address as being a multicast address.
+
+ +-+-+-+-+
+ flgs is a set of 4 flags: |0|0|0|T|
+ +-+-+-+-+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 13]</span>
+<a name="page-14" id="page-14" href="#page-14"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+ The high-order 3 flags are reserved, and must be initialized
+ to 0.
+
+ T = 0 indicates a permanently-assigned ("well-known")
+ multicast address, assigned by the Internet Assigned Number
+ Authority (IANA).
+
+ T = 1 indicates a non-permanently-assigned ("transient")
+ multicast address.
+
+ scop is a 4-bit multicast scope value used to limit the scope
+ of the multicast group. The values are:
+
+ 0 reserved
+ 1 interface-local scope
+ 2 link-local scope
+ 3 reserved
+ 4 admin-local scope
+ 5 site-local scope
+ 6 (unassigned)
+ 7 (unassigned)
+ 8 organization-local scope
+ 9 (unassigned)
+ A (unassigned)
+ B (unassigned)
+ C (unassigned)
+ D (unassigned)
+ E global scope
+ F reserved
+
+ interface-local scope spans only a single interface on a
+ node, and is useful only for loopback transmission of
+ multicast.
+
+ link-local and site-local multicast scopes span the same
+ topological regions as the corresponding unicast scopes.
+
+ admin-local scope is the smallest scope that must be
+ administratively configured, i.e., not automatically derived
+ from physical connectivity or other, non- multicast-related
+ configuration.
+
+ organization-local scope is intended to span multiple sites
+ belonging to a single organization.
+
+ scopes labeled "(unassigned)" are available for
+ administrators to define additional multicast regions.
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 14]</span>
+<a name="page-15" id="page-15" href="#page-15"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+ group ID identifies the multicast group, either permanent or
+ transient, within the given scope.
+
+ The "meaning" of a permanently-assigned multicast address is
+ independent of the scope value. For example, if the "NTP servers
+ group" is assigned a permanent multicast address with a group ID of
+ 101 (hex), then:
+
+ FF01:0:0:0:0:0:0:101 means all NTP servers on the same interface
+ (i.e., the same node) as the sender.
+
+ FF02:0:0:0:0:0:0:101 means all NTP servers on the same link as the
+ sender.
+
+ FF05:0:0:0:0:0:0:101 means all NTP servers in the same site as the
+ sender.
+
+ FF0E:0:0:0:0:0:0:101 means all NTP servers in the internet.
+
+ Non-permanently-assigned multicast addresses are meaningful only
+ within a given scope. For example, a group identified by the non-
+ permanent, site-local multicast address FF15:0:0:0:0:0:0:101 at one
+ site bears no relationship to a group using the same address at a
+ different site, nor to a non-permanent group using the same group ID
+ with different scope, nor to a permanent group with the same group
+ ID.
+
+ Multicast addresses must not be used as source addresses in IPv6
+ packets or appear in any Routing header.
+
+ Routers must not forward any multicast packets beyond of the scope
+ indicated by the scop field in the destination multicast address.
+
+ Nodes must not originate a packet to a multicast address whose scop
+ field contains the reserved value 0; if such a packet is received, it
+ must be silently dropped. Nodes should not originate a packet to a
+ multicast address whose scop field contains the reserved value F; if
+ such a packet is sent or received, it must be treated the same as
+ packets destined to a global (scop E) multicast address.
+
+<span class="h4"><h4><a name="section-2.7.1">2.7.1</a> Pre-Defined Multicast Addresses</h4></span>
+
+ The following well-known multicast addresses are pre-defined. The
+ group ID's defined in this section are defined for explicit scope
+ values.
+
+ Use of these group IDs for any other scope values, with the T flag
+ equal to 0, is not allowed.
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 15]</span>
+<a name="page-16" id="page-16" href="#page-16"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+ Reserved Multicast Addresses: FF00:0:0:0:0:0:0:0
+ FF01:0:0:0:0:0:0:0
+ FF02:0:0:0:0:0:0:0
+ FF03:0:0:0:0:0:0:0
+ FF04:0:0:0:0:0:0:0
+ FF05:0:0:0:0:0:0:0
+ FF06:0:0:0:0:0:0:0
+ FF07:0:0:0:0:0:0:0
+ FF08:0:0:0:0:0:0:0
+ FF09:0:0:0:0:0:0:0
+ FF0A:0:0:0:0:0:0:0
+ FF0B:0:0:0:0:0:0:0
+ FF0C:0:0:0:0:0:0:0
+ FF0D:0:0:0:0:0:0:0
+ FF0E:0:0:0:0:0:0:0
+ FF0F:0:0:0:0:0:0:0
+
+ The above multicast addresses are reserved and shall never be
+ assigned to any multicast group.
+
+ All Nodes Addresses: FF01:0:0:0:0:0:0:1
+ FF02:0:0:0:0:0:0:1
+
+ The above multicast addresses identify the group of all IPv6 nodes,
+ within scope 1 (interface-local) or 2 (link-local).
+
+ All Routers Addresses: FF01:0:0:0:0:0:0:2
+ FF02:0:0:0:0:0:0:2
+ FF05:0:0:0:0:0:0:2
+
+ The above multicast addresses identify the group of all IPv6 routers,
+ within scope 1 (interface-local), 2 (link-local), or 5 (site-local).
+
+ Solicited-Node Address: FF02:0:0:0:0:1:FFXX:XXXX
+
+ Solicited-node multicast address are computed as a function of a
+ node's unicast and anycast addresses. A solicited-node multicast
+ address is formed by taking the low-order 24 bits of an address
+ (unicast or anycast) and appending those bits to the prefix
+ FF02:0:0:0:0:1:FF00::/104 resulting in a multicast address in the
+ range
+
+ FF02:0:0:0:0:1:FF00:0000
+
+ to
+
+ FF02:0:0:0:0:1:FFFF:FFFF
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 16]</span>
+<a name="page-17" id="page-17" href="#page-17"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+ For example, the solicited node multicast address corresponding to
+ the IPv6 address 4037::01:800:200E:8C6C is FF02::1:FF0E:8C6C. IPv6
+ addresses that differ only in the high-order bits, e.g., due to
+ multiple high-order prefixes associated with different aggregations,
+ will map to the same solicited-node address thereby, reducing the
+ number of multicast addresses a node must join.
+
+ A node is required to compute and join (on the appropriate interface)
+ the associated Solicited-Node multicast addresses for every unicast
+ and anycast address it is assigned.
+
+<span class="h3"><h3><a name="section-2.8">2.8</a> A Node's Required Addresses</h3></span>
+
+ A host is required to recognize the following addresses as
+ identifying itself:
+
+ o Its required Link-Local Address for each interface.
+ o Any additional Unicast and Anycast Addresses that have been
+ configured for the node's interfaces (manually or
+ automatically).
+ o The loopback address.
+ o The All-Nodes Multicast Addresses defined in <a href="#section-2.7.1">section 2.7.1</a>.
+ o The Solicited-Node Multicast Address for each of its unicast
+ and anycast addresses.
+ o Multicast Addresses of all other groups to which the node
+ belongs.
+
+ A router is required to recognize all addresses that a host is
+ required to recognize, plus the following addresses as identifying
+ itself:
+
+ o The Subnet-Router Anycast Addresses for all interfaces for
+ which it is configured to act as a router.
+ o All other Anycast Addresses with which the router has been
+ configured.
+ o The All-Routers Multicast Addresses defined in <a href="#section-2.7.1">section 2.7.1</a>.
+
+<span class="h2"><h2><a name="section-3">3</a>. Security Considerations</h2></span>
+
+ IPv6 addressing documents do not have any direct impact on Internet
+ infrastructure security. Authentication of IPv6 packets is defined
+ in [<a href="#ref-AUTH" title="&quot;IP Authentication Header&quot;">AUTH</a>].
+
+
+
+
+
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 17]</span>
+<a name="page-18" id="page-18" href="#page-18"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+<span class="h2"><h2><a name="section-4">4</a>. IANA Considerations</h2></span>
+
+ The table and notes at <a href="http://www.isi.edu/in-notes/iana/assignments/ipv6-address-space.txt">http://www.isi.edu/in-</a>
+ <a href="http://www.isi.edu/in-notes/iana/assignments/ipv6-address-space.txt">notes/iana/assignments/ipv6-address-space.txt</a> should be replaced with
+ the following:
+
+ INTERNET PROTOCOL VERSION 6 ADDRESS SPACE
+
+ The initial assignment of IPv6 address space is as follows:
+
+ Allocation Prefix Fraction of
+ (binary) Address Space
+ ----------------------------------- -------- -------------
+ Unassigned (see Note 1 below) 0000 0000 1/256
+ Unassigned 0000 0001 1/256
+ Reserved for NSAP Allocation 0000 001 1/128 [<a href="http://tools.ietf.org/html/rfc1888">RFC1888</a>]
+ Unassigned 0000 01 1/64
+ Unassigned 0000 1 1/32
+ Unassigned 0001 1/16
+ Global Unicast 001 1/8 [<a href="http://tools.ietf.org/html/rfc2374">RFC2374</a>]
+ Unassigned 010 1/8
+ Unassigned 011 1/8
+ Unassigned 100 1/8
+ Unassigned 101 1/8
+ Unassigned 110 1/8
+ Unassigned 1110 1/16
+ Unassigned 1111 0 1/32
+ Unassigned 1111 10 1/64
+ Unassigned 1111 110 1/128
+ Unassigned 1111 1110 0 1/512
+ Link-Local Unicast Addresses 1111 1110 10 1/1024
+ Site-Local Unicast Addresses 1111 1110 11 1/1024
+ Multicast Addresses 1111 1111 1/256
+
+ Notes:
+
+ 1. The "unspecified address", the "loopback address", and the IPv6
+ Addresses with Embedded IPv4 Addresses are assigned out of the
+ 0000 0000 binary prefix space.
+
+ 2. For now, IANA should limit its allocation of IPv6 unicast address
+ space to the range of addresses that start with binary value 001.
+ The rest of the global unicast address space (approximately 85% of
+ the IPv6 address space) is reserved for future definition and use,
+ and is not to be assigned by IANA at this time.
+
+
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 18]</span>
+<a name="page-19" id="page-19" href="#page-19"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+<span class="h2"><h2><a name="section-5">5</a>. References</h2></span>
+
+<span class="h3"><h3><a name="section-5.1">5.1</a> Normative References</h3></span>
+
+ [<a name="ref-IPV6" id="ref-IPV6">IPV6</a>] Deering, S. and R. Hinden, "Internet Protocol, Version 6
+ (IPv6) Specification", <a href="http://tools.ietf.org/html/rfc2460">RFC 2460</a>, December 1998.
+
+ [<a name="ref-RFC2026" id="ref-RFC2026">RFC2026</a>] Bradner, S., "The Internet Standards Process -- Revision
+ 3", <a href="http://tools.ietf.org/html/bcp9">BCP 9</a> , <a href="http://tools.ietf.org/html/rfc2026">RFC 2026</a>, October 1996.
+
+<span class="h3"><h3><a name="section-5.2">5.2</a> Informative References</h3></span>
+
+ [<a name="ref-ANYCST" id="ref-ANYCST">ANYCST</a>] Partridge, C., Mendez, T. and W. Milliken, "Host Anycasting
+ Service", <a href="http://tools.ietf.org/html/rfc1546">RFC 1546</a>, November 1993.
+
+ [<a name="ref-AUTH" id="ref-AUTH">AUTH</a>] Kent, S. and R. Atkinson, "IP Authentication Header", <a href="http://tools.ietf.org/html/rfc2402">RFC</a>
+ <a href="http://tools.ietf.org/html/rfc2402">2402</a>, November 1998.
+
+ [<a name="ref-AGGR" id="ref-AGGR">AGGR</a>] Hinden, R., O'Dell, M. and S. Deering, "An Aggregatable
+ Global Unicast Address Format", <a href="http://tools.ietf.org/html/rfc2374">RFC 2374</a>, July 1998.
+
+ [<a name="ref-CIDR" id="ref-CIDR">CIDR</a>] Fuller, V., Li, T., Yu, J. and K. Varadhan, "Classless
+ Inter-Domain Routing (CIDR): An Address Assignment and
+ Aggregation Strategy", <a href="http://tools.ietf.org/html/rfc1519">RFC 1519</a>, September 1993.
+
+ [<a name="ref-ETHER" id="ref-ETHER">ETHER</a>] Crawford, M., "Transmission of IPv6 Packets over Ethernet
+ Networks", <a href="http://tools.ietf.org/html/rfc2464">RFC 2464</a>, December 1998.
+
+ [<a name="ref-EUI64" id="ref-EUI64">EUI64</a>] IEEE, "Guidelines for 64-bit Global Identifier (EUI-64)
+ Registration Authority",
+ <a href="http://standards.ieee.org/regauth/oui/tutorials/EUI64.html">http://standards.ieee.org/regauth/oui/tutorials/EUI64.html</a>,
+ March 1997.
+
+ [<a name="ref-FDDI" id="ref-FDDI">FDDI</a>] Crawford, M., "Transmission of IPv6 Packets over FDDI
+ Networks", <a href="http://tools.ietf.org/html/rfc2467">RFC 2467</a>, December 1998.
+
+ [<a name="ref-MASGN" id="ref-MASGN">MASGN</a>] Hinden, R. and S. Deering, "IPv6 Multicast Address
+ Assignments", <a href="http://tools.ietf.org/html/rfc2375">RFC 2375</a>, July 1998.
+
+ [<a name="ref-NSAP" id="ref-NSAP">NSAP</a>] Bound, J., Carpenter, B., Harrington, D., Houldsworth, J.
+ and A. Lloyd, "OSI NSAPs and IPv6", <a href="http://tools.ietf.org/html/rfc1888">RFC 1888</a>, August 1996.
+
+ [<a name="ref-PRIV" id="ref-PRIV">PRIV</a>] Narten, T. and R. Draves, "Privacy Extensions for Stateless
+ Address Autoconfiguration in IPv6", <a href="http://tools.ietf.org/html/rfc3041">RFC 3041</a>, January 2001.
+
+ [<a name="ref-TOKEN" id="ref-TOKEN">TOKEN</a>] Crawford, M., Narten, T. and S. Thomas, "Transmission of
+ IPv6 Packets over Token Ring Networks", <a href="http://tools.ietf.org/html/rfc2470">RFC 2470</a>, December
+ 1998.
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 19]</span>
+<a name="page-20" id="page-20" href="#page-20"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+ [<a name="ref-TRAN" id="ref-TRAN">TRAN</a>] Gilligan, R. and E. Nordmark, "Transition Mechanisms for
+ IPv6 Hosts and Routers", <a href="http://tools.ietf.org/html/rfc2893">RFC 2893</a>, August 2000.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 20]</span>
+<a name="page-21" id="page-21" href="#page-21"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+APPENDIX A: Creating Modified EUI-64 format Interface Identifiers
+
+ Depending on the characteristics of a specific link or node there are
+ a number of approaches for creating Modified EUI-64 format interface
+ identifiers. This appendix describes some of these approaches.
+
+Links or Nodes with IEEE EUI-64 Identifiers
+
+ The only change needed to transform an IEEE EUI-64 identifier to an
+ interface identifier is to invert the "u" (universal/local) bit. For
+ example, a globally unique IEEE EUI-64 identifier of the form:
+
+ |0 1|1 3|3 4|4 6|
+ |0 5|6 1|2 7|8 3|
+ +----------------+----------------+----------------+----------------+
+ |cccccc0gcccccccc|ccccccccmmmmmmmm|mmmmmmmmmmmmmmmm|mmmmmmmmmmmmmmmm|
+ +----------------+----------------+----------------+----------------+
+
+ where "c" are the bits of the assigned company_id, "0" is the value
+ of the universal/local bit to indicate global scope, "g" is
+ individual/group bit, and "m" are the bits of the manufacturer-
+ selected extension identifier. The IPv6 interface identifier would
+ be of the form:
+
+ |0 1|1 3|3 4|4 6|
+ |0 5|6 1|2 7|8 3|
+ +----------------+----------------+----------------+----------------+
+ |cccccc1gcccccccc|ccccccccmmmmmmmm|mmmmmmmmmmmmmmmm|mmmmmmmmmmmmmmmm|
+ +----------------+----------------+----------------+----------------+
+
+ The only change is inverting the value of the universal/local bit.
+
+Links or Nodes with IEEE 802 48 bit MAC's
+
+ [<a name="ref-EUI64" id="ref-EUI64">EUI64</a>] defines a method to create a IEEE EUI-64 identifier from an
+ IEEE 48bit MAC identifier. This is to insert two octets, with
+ hexadecimal values of 0xFF and 0xFE, in the middle of the 48 bit MAC
+ (between the company_id and vendor supplied id). For example, the 48
+ bit IEEE MAC with global scope:
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 21]</span>
+<a name="page-22" id="page-22" href="#page-22"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+ |0 1|1 3|3 4|
+ |0 5|6 1|2 7|
+ +----------------+----------------+----------------+
+ |cccccc0gcccccccc|ccccccccmmmmmmmm|mmmmmmmmmmmmmmmm|
+ +----------------+----------------+----------------+
+
+ where "c" are the bits of the assigned company_id, "0" is the value
+ of the universal/local bit to indicate global scope, "g" is
+ individual/group bit, and "m" are the bits of the manufacturer-
+ selected extension identifier. The interface identifier would be of
+ the form:
+
+ |0 1|1 3|3 4|4 6|
+ |0 5|6 1|2 7|8 3|
+ +----------------+----------------+----------------+----------------+
+ |cccccc1gcccccccc|cccccccc11111111|11111110mmmmmmmm|mmmmmmmmmmmmmmmm|
+ +----------------+----------------+----------------+----------------+
+
+ When IEEE 802 48bit MAC addresses are available (on an interface or a
+ node), an implementation may use them to create interface identifiers
+ due to their availability and uniqueness properties.
+
+Links with Other Kinds of Identifiers
+
+ There are a number of types of links that have link-layer interface
+ identifiers other than IEEE EIU-64 or IEEE 802 48-bit MACs. Examples
+ include LocalTalk and Arcnet. The method to create an Modified EUI-
+ 64 format identifier is to take the link identifier (e.g., the
+ LocalTalk 8 bit node identifier) and zero fill it to the left. For
+ example, a LocalTalk 8 bit node identifier of hexadecimal value 0x4F
+ results in the following interface identifier:
+
+ |0 1|1 3|3 4|4 6|
+ |0 5|6 1|2 7|8 3|
+ +----------------+----------------+----------------+----------------+
+ |0000000000000000|0000000000000000|0000000000000000|0000000001001111|
+ +----------------+----------------+----------------+----------------+
+
+ Note that this results in the universal/local bit set to "0" to
+ indicate local scope.
+
+Links without Identifiers
+
+ There are a number of links that do not have any type of built-in
+ identifier. The most common of these are serial links and configured
+ tunnels. Interface identifiers must be chosen that are unique within
+ a subnet-prefix.
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 22]</span>
+<a name="page-23" id="page-23" href="#page-23"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+ When no built-in identifier is available on a link the preferred
+ approach is to use a global interface identifier from another
+ interface or one which is assigned to the node itself. When using
+ this approach no other interface connecting the same node to the same
+ subnet-prefix may use the same identifier.
+
+ If there is no global interface identifier available for use on the
+ link the implementation needs to create a local-scope interface
+ identifier. The only requirement is that it be unique within a
+ subnet prefix. There are many possible approaches to select a
+ subnet-prefix-unique interface identifier. These include:
+
+ Manual Configuration
+ Node Serial Number
+ Other node-specific token
+
+ The subnet-prefix-unique interface identifier should be generated in
+ a manner that it does not change after a reboot of a node or if
+ interfaces are added or deleted from the node.
+
+ The selection of the appropriate algorithm is link and implementation
+ dependent. The details on forming interface identifiers are defined
+ in the appropriate "IPv6 over &lt;link&gt;" specification. It is strongly
+ recommended that a collision detection algorithm be implemented as
+ part of any automatic algorithm.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 23]</span>
+<a name="page-24" id="page-24" href="#page-24"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+APPENDIX B: Changes from <a href="http://tools.ietf.org/html/rfc2373">RFC-2373</a>
+
+ The following changes were made from <a href="http://tools.ietf.org/html/rfc2373">RFC-2373</a> "IP Version 6
+ Addressing Architecture":
+
+ - Clarified text in <a href="#section-2.2">section 2.2</a> to allow "::" to represent one or
+ more groups of 16 bits of zeros.
+ - Changed uniqueness requirement of Interface Identifiers from
+ unique on a link to unique within a subnet prefix. Also added a
+ recommendation that the same interface identifier not be assigned
+ to different machines on a link.
+ - Change site-local format to make the subnet ID field 54-bit long
+ and remove the 38-bit zero's field.
+ - Added description of multicast scop values and rules to handle the
+ reserved scop value 0.
+ - Revised sections 2.4 and 2.5.6 to simplify and clarify how
+ different address types are identified. This was done to insure
+ that implementations do not build in any knowledge about global
+ unicast format prefixes. Changes include:
+ o Removed Format Prefix (FP) terminology
+ o Revised list of address types to only include exceptions to
+ global unicast and a singe entry that identifies everything
+ else as Global Unicast.
+ o Removed list of defined prefix exceptions from <a href="#section-2.5.6">section 2.5.6</a>
+ as it is now the main part of <a href="#section-2.4">section 2.4</a>.
+ - Clarified text relating to EUI-64 identifiers to distinguish
+ between IPv6's "Modified EUI-64 format" identifiers and IEEE EUI-
+ 64 identifiers.
+ - Combined the sections on the Global Unicast Addresses and NSAP
+ Addresses into a single section on Global Unicast Addresses,
+ generalized the Global Unicast format, and cited [<a href="#ref-AGGR" title="&quot;An Aggregatable Global Unicast Address Format&quot;">AGGR</a>] and [<a href="#ref-NSAP" title="&quot;OSI NSAPs and IPv6&quot;">NSAP</a>]
+ as examples.
+ - Reordered sections 2.5.4 and 2.5.5.
+ - Removed <a href="#section-2.7.2">section 2.7.2</a> Assignment of New IPv6 Multicast Addresses
+ because this is being redefined elsewhere.
+ - Added an IANA considerations section that updates the IANA IPv6
+ address allocations and documents the NSAP and AGGR allocations.
+ - Added clarification that the "IPv4-compatible IPv6 address" must
+ use global IPv4 unicast addresses.
+ - Divided references in to normative and non-normative sections.
+ - Added reference to [<a href="#ref-PRIV" title="&quot;Privacy Extensions for Stateless Address Autoconfiguration in IPv6&quot;">PRIV</a>] in <a href="#section-2.5.1">section 2.5.1</a>
+ - Added clarification that routers must not forward multicast
+ packets outside of the scope indicated in the multicast address.
+ - Added clarification that routers must not forward packets with
+ source address of the unspecified address.
+ - Added clarification that routers must drop packets received on an
+ interface with destination address of loopback.
+ - Clarified the definition of IPv4-mapped addresses.
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 24]</span>
+<a name="page-25" id="page-25" href="#page-25"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+ - Removed the ABNF Description of Text Representations Appendix.
+ - Removed the address block reserved for IPX addresses.
+ - Multicast scope changes:
+ o Changed name of scope value 1 from "node-local" to
+ "interface-local"
+ o Defined scope value 4 as "admin-local"
+ - Corrected reference to <a href="http://tools.ietf.org/html/rfc1933">RFC1933</a> and updated references.
+ - Many small changes to clarify and make the text more consistent.
+
+Authors' Addresses
+
+ Robert M. Hinden
+ Nokia
+ 313 Fairchild Drive
+ Mountain View, CA 94043
+ USA
+
+ Phone: +1 650 625-2004
+ EMail: hinden@iprg.nokia.com
+
+
+ Stephen E. Deering
+ Cisco Systems, Inc.
+ 170 West Tasman Drive
+ San Jose, CA 95134-1706
+ USA
+
+ Phone: +1 408 527-8213
+ EMail: deering@cisco.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Hinden &amp; Deering Standards Track [Page 25]</span>
+<a name="page-26" id="page-26" href="#page-26"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a> IPv6 Addressing Architecture April 2003</span>
+
+
+Full Copyright Statement
+
+ Copyright (C) The Internet Society (2003). All Rights Reserved.
+
+ This document and translations of it may be copied and furnished to
+ others, and derivative works that comment on or otherwise explain it
+ or assist in its implementation may be prepared, copied, published
+ and distributed, in whole or in part, without restriction of any
+ kind, provided that the above copyright notice and this paragraph are
+ included on all such copies and derivative works. However, this
+ document itself may not be modified in any way, such as by removing
+ the copyright notice or references to the Internet Society or other
+ Internet organizations, except as needed for the purpose of
+ developing Internet standards in which case the procedures for
+ copyrights defined in the Internet Standards process must be
+ followed, or as required to translate it into languages other than
+ English.
+
+ The limited permissions granted above are perpetual and will not be
+ revoked by the Internet Society or its successors or assigns.
+
+ This document and the information contained herein is provided on an
+ "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+ TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+ BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+ HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+ MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Acknowledgement
+
+ Funding for the RFC Editor function is currently provided by the
+ Internet Society.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Hinden &amp; Deering Standards Track [Page 26]
+<span class="break"> </span>
+
+</pre><br>
+<span class="noprint"><small><small>Html markup produced by rfcmarkup 1.46, available from
+<a href="http://tools.ietf.org/tools/rfcmarkup/">http://tools.ietf.org/tools/rfcmarkup/</a>
+</small></small></span>
+
+</body></html> \ No newline at end of file
diff --git a/doc/rfc3986.htm b/doc/rfc3986.htm
new file mode 100644
index 0000000..b392007
--- /dev/null
+++ b/doc/rfc3986.htm
@@ -0,0 +1,3539 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xml:lang="en" lang="en"><head>
+
+
+ <meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
+ <meta name="robots" content="index,follow">
+ <meta name="creator" content="rfcmarkup version 1.46">
+ <link rel="icon" href="http://tools.ietf.org/images/rfc.png" type="image/png">
+ <link rel="shortcut icon" href="http://tools.ietf.org/images/rfc.png" type="image/png"><title>RFC 3986 Uniform Resource Identifier (URI): Generic Syntax</title>
+
+
+ <style type="text/css">
+ body {
+ margin: 0px 8px;
+ font-size: 1em;
+ }
+ h1, h2, h3, h4, h5, h6, .h1, .h2, .h3, .h4, .h5, .h6 {
+ font-weight: bold;
+ line-height: 0pt;
+ display: inline;
+ white-space: pre;
+ font-family: monospace;
+ font-size: 1em;
+ font-weight: bold;
+ }
+ pre {
+ font-size: 1em;
+ }
+ .pre {
+ white-space: pre;
+ font-family: monospace;
+ }
+ .header{
+ font-weight: bold;
+ }
+ @media print {
+ body {
+ font-size: 10.5pt;
+ }
+ h1, h2, h3, h4, h5, h6 {
+ font-size: 10.5pt;
+ }
+
+ a:link, a:visited {
+ color: inherit;
+ text-decoration: none;
+ }
+ .break {
+ page-break-before: always;
+ text-decoration: none;
+ }
+ .noprint {
+ display: none;
+ }
+ }
+ @media screen {
+ .grey, .grey a:link, .grey a:visited {
+ color: #777;
+ }
+ .break {
+ text-decoration: none;
+ display: none;
+ }
+ .docinfo {
+ background-color: #EEE;
+ }
+ .top {
+ border-top: 2px solid #EEE;
+ }
+ .bgwhite { background-color: white; }
+ .bgred { background-color: #F44; }
+ .bggrey { background-color: #666; }
+ .bgbrown { background-color: #840; }
+ .bgorange { background-color: #FA0; }
+ .bgyellow { background-color: #EE0; }
+ .bgmagenta{ background-color: #F4F; }
+ .bgblue { background-color: #66F; }
+ .bgcyan { background-color: #4DD; }
+ .bggreen { background-color: #4F4; }
+
+ .legend { font-size: 90%; }
+ .cplate { font-size: 70%; border: solid grey 1px; }
+ }
+ </style>
+
+ <script type="text/javascript"><!--
+ function addHeaderTags() {
+ var spans = document.getElementsByTagName("span");
+ for (var i=0; i < spans.length; i++) {
+ var elem = spans[i];
+ if (elem) {
+ var level = elem.getAttribute("class");
+ if (level == "h1" || level == "h2" || level == "h3" || level == "h4" || level == "h5" || level == "h6") {
+ elem.innerHTML = "<"+level+">"+elem.innerHTML+"</"+level+">";
+ }
+ }
+ }
+ }
+ var legend_html = "Colour legend:<br /> <table> <tr><td>Unknown:</td> <td><span class='cplate bgwhite'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Draft:</td> <td><span class='cplate bgred'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Informational:</td> <td><span class='cplate bgorange'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Experimental:</td> <td><span class='cplate bgyellow'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Best Common Practice:</td><td><span class='cplate bgmagenta'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Proposed Standard:</td><td><span class='cplate bgblue'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Draft Standard:</td> <td><span class='cplate bgcyan'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Standard:</td> <td><span class='cplate bggreen'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Historic:</td> <td><span class='cplate bggrey'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> <tr><td>Obsolete:</td> <td><span class='cplate bgbrown'>&nbsp;&nbsp;&nbsp;&nbsp;</span></td></tr> </table>";
+ function showElem(id) {
+ var elem = document.getElementById(id);
+ elem.innerHTML = eval(id+"_html");
+ elem.style.visibility='visible';
+ }
+ function hideElem(id) {
+ var elem = document.getElementById(id);
+ elem.style.visibility='hidden';
+ elem.innerHTML = "";
+ }
+ // -->
+ </script></head><body onload="addHeaderTags()">
+ <div style="height: 8px;">
+ <span style="cursor: pointer;" onmouseover="this.style.cursor='pointer';" onclick="showElem('legend');" onmouseout="hideElem('legend')" class="pre noprint docinfo bggreen" title="Click for colour legend."> </span>
+ <div id="legend" class="docinfo noprint pre legend" style="border: 1px solid rgb(51, 68, 85); padding: 4px 9px 5px 7px; position: absolute; top: 4px; left: 4ex; visibility: hidden; background-color: white;" onmouseover="showElem('legend');" onmouseout="hideElem('legend');"></div>
+ </div>
+<span class="pre noprint docinfo top">[<a href="http://tools.ietf.org/html/">RFCs/IDs</a>] [<a href="http://tools.ietf.org/rfc/rfc3986.txt">Plain Text</a>] [From <a href="http://tools.ietf.org/html/draft-fielding-uri-rfc2396bis">draft-fielding-uri-rfc2396bis</a>] </span><br>
+<span class="pre noprint docinfo"> </span><br>
+<span class="pre noprint docinfo"> STANDARD</span><br>
+<span class="pre noprint docinfo"> </span><br>
+<pre>Network Working Group T. Berners-Lee
+Request for Comments: 3986 W3C/MIT
+STD: 66 R. Fielding
+Updates: <a href="http://tools.ietf.org/html/rfc1738">1738</a> Day Software
+Obsoletes: <a href="http://tools.ietf.org/html/rfc2732">2732</a>, <a href="http://tools.ietf.org/html/rfc2396">2396</a>, <a href="http://tools.ietf.org/html/rfc1808">1808</a> L. Masinter
+Category: Standards Track Adobe Systems
+ January 2005
+
+
+ <span class="h1"><h1>Uniform Resource Identifier (URI): Generic Syntax</h1></span>
+
+Status of This Memo
+
+ This document specifies an Internet standards track protocol for the
+ Internet community, and requests discussion and suggestions for
+ improvements. Please refer to the current edition of the "Internet
+ Official Protocol Standards" (STD 1) for the standardization state
+ and status of this protocol. Distribution of this memo is unlimited.
+
+Copyright Notice
+
+ Copyright (C) The Internet Society (2005).
+
+Abstract
+
+ A Uniform Resource Identifier (URI) is a compact sequence of
+ characters that identifies an abstract or physical resource. This
+ specification defines the generic URI syntax and a process for
+ resolving URI references that might be in relative form, along with
+ guidelines and security considerations for the use of URIs on the
+ Internet. The URI syntax defines a grammar that is a superset of all
+ valid URIs, allowing an implementation to parse the common components
+ of a URI reference without knowing the scheme-specific requirements
+ of every possible identifier. This specification does not define a
+ generative grammar for URIs; that task is performed by the individual
+ specifications of each URI scheme.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 1]</span>
+<a name="page-2" id="page-2" href="#page-2"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+Table of Contents
+
+ <a href="#section-1">1</a>. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-4">4</a>
+ <a href="#section-1.1">1.1</a>. Overview of URIs . . . . . . . . . . . . . . . . . . . . <a href="#page-4">4</a>
+ <a href="#section-1.1.1">1.1.1</a>. Generic Syntax . . . . . . . . . . . . . . . . . <a href="#page-6">6</a>
+ <a href="#section-1.1.2">1.1.2</a>. Examples . . . . . . . . . . . . . . . . . . . . <a href="#page-7">7</a>
+ <a href="#section-1.1.3">1.1.3</a>. URI, URL, and URN . . . . . . . . . . . . . . . <a href="#page-7">7</a>
+ <a href="#section-1.2">1.2</a>. Design Considerations . . . . . . . . . . . . . . . . . <a href="#page-8">8</a>
+ <a href="#section-1.2.1">1.2.1</a>. Transcription . . . . . . . . . . . . . . . . . <a href="#page-8">8</a>
+ <a href="#section-1.2.2">1.2.2</a>. Separating Identification from Interaction . . . <a href="#page-9">9</a>
+ <a href="#section-1.2.3">1.2.3</a>. Hierarchical Identifiers . . . . . . . . . . . . <a href="#page-10">10</a>
+ <a href="#section-1.3">1.3</a>. Syntax Notation . . . . . . . . . . . . . . . . . . . . <a href="#page-11">11</a>
+ <a href="#section-2">2</a>. Characters . . . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-11">11</a>
+ <a href="#section-2.1">2.1</a>. Percent-Encoding . . . . . . . . . . . . . . . . . . . . <a href="#page-12">12</a>
+ <a href="#section-2.2">2.2</a>. Reserved Characters . . . . . . . . . . . . . . . . . . <a href="#page-12">12</a>
+ <a href="#section-2.3">2.3</a>. Unreserved Characters . . . . . . . . . . . . . . . . . <a href="#page-13">13</a>
+ <a href="#section-2.4">2.4</a>. When to Encode or Decode . . . . . . . . . . . . . . . . <a href="#page-14">14</a>
+ <a href="#section-2.5">2.5</a>. Identifying Data . . . . . . . . . . . . . . . . . . . . <a href="#page-14">14</a>
+ <a href="#section-3">3</a>. Syntax Components . . . . . . . . . . . . . . . . . . . . . . <a href="#page-16">16</a>
+ <a href="#section-3.1">3.1</a>. Scheme . . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-17">17</a>
+ <a href="#section-3.2">3.2</a>. Authority . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-17">17</a>
+ <a href="#section-3.2.1">3.2.1</a>. User Information . . . . . . . . . . . . . . . . <a href="#page-18">18</a>
+ <a href="#section-3.2.2">3.2.2</a>. Host . . . . . . . . . . . . . . . . . . . . . . <a href="#page-18">18</a>
+ <a href="#section-3.2.3">3.2.3</a>. Port . . . . . . . . . . . . . . . . . . . . . . <a href="#page-22">22</a>
+ <a href="#section-3.3">3.3</a>. Path . . . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-22">22</a>
+ <a href="#section-3.4">3.4</a>. Query . . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-23">23</a>
+ <a href="#section-3.5">3.5</a>. Fragment . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-24">24</a>
+ <a href="#section-4">4</a>. Usage . . . . . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-25">25</a>
+ <a href="#section-4.1">4.1</a>. URI Reference . . . . . . . . . . . . . . . . . . . . . <a href="#page-25">25</a>
+ <a href="#section-4.2">4.2</a>. Relative Reference . . . . . . . . . . . . . . . . . . . <a href="#page-26">26</a>
+ <a href="#section-4.3">4.3</a>. Absolute URI . . . . . . . . . . . . . . . . . . . . . . <a href="#page-27">27</a>
+ <a href="#section-4.4">4.4</a>. Same-Document Reference . . . . . . . . . . . . . . . . <a href="#page-27">27</a>
+ <a href="#section-4.5">4.5</a>. Suffix Reference . . . . . . . . . . . . . . . . . . . . <a href="#page-27">27</a>
+ <a href="#section-5">5</a>. Reference Resolution . . . . . . . . . . . . . . . . . . . . . <a href="#page-28">28</a>
+ <a href="#section-5.1">5.1</a>. Establishing a Base URI . . . . . . . . . . . . . . . . <a href="#page-28">28</a>
+ <a href="#section-5.1.1">5.1.1</a>. Base URI Embedded in Content . . . . . . . . . . <a href="#page-29">29</a>
+ <a href="#section-5.1.2">5.1.2</a>. Base URI from the Encapsulating Entity . . . . . <a href="#page-29">29</a>
+ <a href="#section-5.1.3">5.1.3</a>. Base URI from the Retrieval URI . . . . . . . . <a href="#page-30">30</a>
+ <a href="#section-5.1.4">5.1.4</a>. Default Base URI . . . . . . . . . . . . . . . . <a href="#page-30">30</a>
+ <a href="#section-5.2">5.2</a>. Relative Resolution . . . . . . . . . . . . . . . . . . <a href="#page-30">30</a>
+ <a href="#section-5.2.1">5.2.1</a>. Pre-parse the Base URI . . . . . . . . . . . . . <a href="#page-31">31</a>
+ <a href="#section-5.2.2">5.2.2</a>. Transform References . . . . . . . . . . . . . . <a href="#page-31">31</a>
+ <a href="#section-5.2.3">5.2.3</a>. Merge Paths . . . . . . . . . . . . . . . . . . <a href="#page-32">32</a>
+ <a href="#section-5.2.4">5.2.4</a>. Remove Dot Segments . . . . . . . . . . . . . . <a href="#page-33">33</a>
+ <a href="#section-5.3">5.3</a>. Component Recomposition . . . . . . . . . . . . . . . . <a href="#page-35">35</a>
+ <a href="#section-5.4">5.4</a>. Reference Resolution Examples . . . . . . . . . . . . . <a href="#page-35">35</a>
+ <a href="#section-5.4.1">5.4.1</a>. Normal Examples . . . . . . . . . . . . . . . . <a href="#page-36">36</a>
+ <a href="#section-5.4.2">5.4.2</a>. Abnormal Examples . . . . . . . . . . . . . . . <a href="#page-36">36</a>
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 2]</span>
+<a name="page-3" id="page-3" href="#page-3"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ <a href="#section-6">6</a>. Normalization and Comparison . . . . . . . . . . . . . . . . . <a href="#page-38">38</a>
+ <a href="#section-6.1">6.1</a>. Equivalence . . . . . . . . . . . . . . . . . . . . . . <a href="#page-38">38</a>
+ <a href="#section-6.2">6.2</a>. Comparison Ladder . . . . . . . . . . . . . . . . . . . <a href="#page-39">39</a>
+ <a href="#section-6.2.1">6.2.1</a>. Simple String Comparison . . . . . . . . . . . . <a href="#page-39">39</a>
+ <a href="#section-6.2.2">6.2.2</a>. Syntax-Based Normalization . . . . . . . . . . . <a href="#page-40">40</a>
+ <a href="#section-6.2.3">6.2.3</a>. Scheme-Based Normalization . . . . . . . . . . . <a href="#page-41">41</a>
+ <a href="#section-6.2.4">6.2.4</a>. Protocol-Based Normalization . . . . . . . . . . <a href="#page-42">42</a>
+ <a href="#section-7">7</a>. Security Considerations . . . . . . . . . . . . . . . . . . . <a href="#page-43">43</a>
+ <a href="#section-7.1">7.1</a>. Reliability and Consistency . . . . . . . . . . . . . . <a href="#page-43">43</a>
+ <a href="#section-7.2">7.2</a>. Malicious Construction . . . . . . . . . . . . . . . . . <a href="#page-43">43</a>
+ <a href="#section-7.3">7.3</a>. Back-End Transcoding . . . . . . . . . . . . . . . . . . <a href="#page-44">44</a>
+ <a href="#section-7.4">7.4</a>. Rare IP Address Formats . . . . . . . . . . . . . . . . <a href="#page-45">45</a>
+ <a href="#section-7.5">7.5</a>. Sensitive Information . . . . . . . . . . . . . . . . . <a href="#page-45">45</a>
+ <a href="#section-7.6">7.6</a>. Semantic Attacks . . . . . . . . . . . . . . . . . . . . <a href="#page-45">45</a>
+ <a href="#section-8">8</a>. IANA Considerations . . . . . . . . . . . . . . . . . . . . . <a href="#page-46">46</a>
+ <a href="#section-9">9</a>. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-46">46</a>
+ <a href="#section-10">10</a>. References . . . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-46">46</a>
+ <a href="#section-10.1">10.1</a>. Normative References . . . . . . . . . . . . . . . . . . <a href="#page-46">46</a>
+ <a href="#section-10.2">10.2</a>. Informative References . . . . . . . . . . . . . . . . . <a href="#page-47">47</a>
+ A. Collected ABNF for URI . . . . . . . . . . . . . . . . . . . . <a href="#page-49">49</a>
+ B. Parsing a URI Reference with a Regular Expression . . . . . . <a href="#page-50">50</a>
+ C. Delimiting a URI in Context . . . . . . . . . . . . . . . . . <a href="#page-51">51</a>
+ D. Changes from <a href="http://tools.ietf.org/html/rfc2396">RFC 2396</a> . . . . . . . . . . . . . . . . . . . . <a href="#page-53">53</a>
+ D.1. Additions . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-53">53</a>
+ D.2. Modifications . . . . . . . . . . . . . . . . . . . . . <a href="#page-53">53</a>
+ Index . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-56">56</a>
+ Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . <a href="#page-60">60</a>
+ Full Copyright Statement . . . . . . . . . . . . . . . . . . . . . <a href="#page-61">61</a>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 3]</span>
+<a name="page-4" id="page-4" href="#page-4"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+<span class="h2"><h2><a name="section-1">1</a>. Introduction</h2></span>
+
+ A Uniform Resource Identifier (URI) provides a simple and extensible
+ means for identifying a resource. This specification of URI syntax
+ and semantics is derived from concepts introduced by the World Wide
+ Web global information initiative, whose use of these identifiers
+ dates from 1990 and is described in "Universal Resource Identifiers
+ in WWW" [<a href="http://tools.ietf.org/html/rfc1630" title="&quot;Universal Resource Identifiers in WWW: A Unifying Syntax for the Expression of Names and Addresses of Objects on the Network as used in the World-Wide Web&quot;">RFC1630</a>]. The syntax is designed to meet the
+ recommendations laid out in "Functional Recommendations for Internet
+ Resource Locators" [<a href="http://tools.ietf.org/html/rfc1736" title="&quot;Functional Recommendations for Internet Resource Locators&quot;">RFC1736</a>] and "Functional Requirements for Uniform
+ Resource Names" [<a href="http://tools.ietf.org/html/rfc1737" title="&quot;Functional Requirements for Uniform Resource Names&quot;">RFC1737</a>].
+
+ This document obsoletes [<a href="http://tools.ietf.org/html/rfc2396" title="&quot;Uniform Resource Identifiers (URI): Generic Syntax&quot;">RFC2396</a>], which merged "Uniform Resource
+ Locators" [<a href="http://tools.ietf.org/html/rfc1738" title="&quot;Uniform Resource Locators (URL)&quot;">RFC1738</a>] and "Relative Uniform Resource Locators"
+ [<a href="http://tools.ietf.org/html/rfc1808" title="&quot;Relative Uniform Resource Locators&quot;">RFC1808</a>] in order to define a single, generic syntax for all URIs.
+ It obsoletes [<a href="http://tools.ietf.org/html/rfc2732" title="&quot;Format for Literal IPv6 Addresses in URL&amp;#39;s&quot;">RFC2732</a>], which introduced syntax for an IPv6 address.
+ It excludes portions of <a href="http://tools.ietf.org/html/rfc1738">RFC 1738</a> that defined the specific syntax of
+ individual URI schemes; those portions will be updated as separate
+ documents. The process for registration of new URI schemes is
+ defined separately by [<a href="#ref-BCP35" title="&quot;Registration Procedures for URL Scheme Names&quot;">BCP35</a>]. Advice for designers of new URI
+ schemes can be found in [<a href="http://tools.ietf.org/html/rfc2718" title="&quot;Guidelines for new URL Schemes&quot;">RFC2718</a>]. All significant changes from <a href="http://tools.ietf.org/html/rfc2396">RFC</a>
+ <a href="http://tools.ietf.org/html/rfc2396">2396</a> are noted in Appendix D.
+
+ This specification uses the terms "character" and "coded character
+ set" in accordance with the definitions provided in [<a href="#ref-BCP19" title="&quot;IANA Charset Registration Procedures&quot;">BCP19</a>], and
+ "character encoding" in place of what [<a href="#ref-BCP19" title="&quot;IANA Charset Registration Procedures&quot;">BCP19</a>] refers to as a
+ "charset".
+
+<span class="h3"><h3><a name="section-1.1">1.1</a>. Overview of URIs</h3></span>
+
+ URIs are characterized as follows:
+
+ Uniform
+
+ Uniformity provides several benefits. It allows different types
+ of resource identifiers to be used in the same context, even when
+ the mechanisms used to access those resources may differ. It
+ allows uniform semantic interpretation of common syntactic
+ conventions across different types of resource identifiers. It
+ allows introduction of new types of resource identifiers without
+ interfering with the way that existing identifiers are used. It
+ allows the identifiers to be reused in many different contexts,
+ thus permitting new applications or protocols to leverage a pre-
+ existing, large, and widely used set of resource identifiers.
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 4]</span>
+<a name="page-5" id="page-5" href="#page-5"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ Resource
+
+ This specification does not limit the scope of what might be a
+ resource; rather, the term "resource" is used in a general sense
+ for whatever might be identified by a URI. Familiar examples
+ include an electronic document, an image, a source of information
+ with a consistent purpose (e.g., "today's weather report for Los
+ Angeles"), a service (e.g., an HTTP-to-SMS gateway), and a
+ collection of other resources. A resource is not necessarily
+ accessible via the Internet; e.g., human beings, corporations, and
+ bound books in a library can also be resources. Likewise,
+ abstract concepts can be resources, such as the operators and
+ operands of a mathematical equation, the types of a relationship
+ (e.g., "parent" or "employee"), or numeric values (e.g., zero,
+ one, and infinity).
+
+ Identifier
+
+ An identifier embodies the information required to distinguish
+ what is being identified from all other things within its scope of
+ identification. Our use of the terms "identify" and "identifying"
+ refer to this purpose of distinguishing one resource from all
+ other resources, regardless of how that purpose is accomplished
+ (e.g., by name, address, or context). These terms should not be
+ mistaken as an assumption that an identifier defines or embodies
+ the identity of what is referenced, though that may be the case
+ for some identifiers. Nor should it be assumed that a system
+ using URIs will access the resource identified: in many cases,
+ URIs are used to denote resources without any intention that they
+ be accessed. Likewise, the "one" resource identified might not be
+ singular in nature (e.g., a resource might be a named set or a
+ mapping that varies over time).
+
+ A URI is an identifier consisting of a sequence of characters
+ matching the syntax rule named &lt;URI&gt; in <a href="#section-3">Section 3</a>. It enables
+ uniform identification of resources via a separately defined
+ extensible set of naming schemes (<a href="#section-3.1">Section 3.1</a>). How that
+ identification is accomplished, assigned, or enabled is delegated to
+ each scheme specification.
+
+ This specification does not place any limits on the nature of a
+ resource, the reasons why an application might seek to refer to a
+ resource, or the kinds of systems that might use URIs for the sake of
+ identifying resources. This specification does not require that a
+ URI persists in identifying the same resource over time, though that
+ is a common goal of all URI schemes. Nevertheless, nothing in this
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 5]</span>
+<a name="page-6" id="page-6" href="#page-6"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ specification prevents an application from limiting itself to
+ particular types of resources, or to a subset of URIs that maintains
+ characteristics desired by that application.
+
+ URIs have a global scope and are interpreted consistently regardless
+ of context, though the result of that interpretation may be in
+ relation to the end-user's context. For example, "<a href="http://localhost/">http://localhost/</a>"
+ has the same interpretation for every user of that reference, even
+ though the network interface corresponding to "localhost" may be
+ different for each end-user: interpretation is independent of access.
+ However, an action made on the basis of that reference will take
+ place in relation to the end-user's context, which implies that an
+ action intended to refer to a globally unique thing must use a URI
+ that distinguishes that resource from all other things. URIs that
+ identify in relation to the end-user's local context should only be
+ used when the context itself is a defining aspect of the resource,
+ such as when an on-line help manual refers to a file on the end-
+ user's file system (e.g., "file:///etc/hosts").
+
+<span class="h4"><h4><a name="section-1.1.1">1.1.1</a>. Generic Syntax</h4></span>
+
+ Each URI begins with a scheme name, as defined in <a href="#section-3.1">Section 3.1</a>, that
+ refers to a specification for assigning identifiers within that
+ scheme. As such, the URI syntax is a federated and extensible naming
+ system wherein each scheme's specification may further restrict the
+ syntax and semantics of identifiers using that scheme.
+
+ This specification defines those elements of the URI syntax that are
+ required of all URI schemes or are common to many URI schemes. It
+ thus defines the syntax and semantics needed to implement a scheme-
+ independent parsing mechanism for URI references, by which the
+ scheme-dependent handling of a URI can be postponed until the
+ scheme-dependent semantics are needed. Likewise, protocols and data
+ formats that make use of URI references can refer to this
+ specification as a definition for the range of syntax allowed for all
+ URIs, including those schemes that have yet to be defined. This
+ decouples the evolution of identification schemes from the evolution
+ of protocols, data formats, and implementations that make use of
+ URIs.
+
+ A parser of the generic URI syntax can parse any URI reference into
+ its major components. Once the scheme is determined, further
+ scheme-specific parsing can be performed on the components. In other
+ words, the URI generic syntax is a superset of the syntax of all URI
+ schemes.
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 6]</span>
+<a name="page-7" id="page-7" href="#page-7"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+<span class="h4"><h4><a name="section-1.1.2">1.1.2</a>. Examples</h4></span>
+
+ The following example URIs illustrate several URI schemes and
+ variations in their common syntax components:
+
+ <a href="ftp://ftp.is.co.za/rfc/rfc1808.txt">ftp://ftp.is.co.za/rfc/rfc1808.txt</a>
+
+ <a href="http://www.ietf.org/rfc/rfc2396.txt">http://www.ietf.org/rfc/rfc2396.txt</a>
+
+ ldap://[2001:db8::7]/c=GB?objectClass?one
+
+ mailto:John.Doe@example.com
+
+ news:comp.infosystems.www.servers.unix
+
+ tel:+1-816-555-1212
+
+ telnet://192.0.2.16:80/
+
+ urn:oasis:names:specification:docbook:dtd:xml:4.1.2
+
+
+<span class="h4"><h4><a name="section-1.1.3">1.1.3</a>. URI, URL, and URN</h4></span>
+
+ A URI can be further classified as a locator, a name, or both. The
+ term "Uniform Resource Locator" (URL) refers to the subset of URIs
+ that, in addition to identifying a resource, provide a means of
+ locating the resource by describing its primary access mechanism
+ (e.g., its network "location"). The term "Uniform Resource Name"
+ (URN) has been used historically to refer to both URIs under the
+ "urn" scheme [<a href="http://tools.ietf.org/html/rfc2141" title="&quot;URN Syntax&quot;">RFC2141</a>], which are required to remain globally unique
+ and persistent even when the resource ceases to exist or becomes
+ unavailable, and to any other URI with the properties of a name.
+
+ An individual scheme does not have to be classified as being just one
+ of "name" or "locator". Instances of URIs from any given scheme may
+ have the characteristics of names or locators or both, often
+ depending on the persistence and care in the assignment of
+ identifiers by the naming authority, rather than on any quality of
+ the scheme. Future specifications and related documentation should
+ use the general term "URI" rather than the more restrictive terms
+ "URL" and "URN" [<a href="http://tools.ietf.org/html/rfc3305" title="&quot;Report from the Joint W3C/IETF URI Planning Interest Group: Uniform Resource Identifiers (URIs), URLs, and Uniform Resource Names (URNs): Clarifications and Recommendations&quot;">RFC3305</a>].
+
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 7]</span>
+<a name="page-8" id="page-8" href="#page-8"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+<span class="h3"><h3><a name="section-1.2">1.2</a>. Design Considerations</h3></span>
+
+<span class="h4"><h4><a name="section-1.2.1">1.2.1</a>. Transcription</h4></span>
+
+ The URI syntax has been designed with global transcription as one of
+ its main considerations. A URI is a sequence of characters from a
+ very limited set: the letters of the basic Latin alphabet, digits,
+ and a few special characters. A URI may be represented in a variety
+ of ways; e.g., ink on paper, pixels on a screen, or a sequence of
+ character encoding octets. The interpretation of a URI depends only
+ on the characters used and not on how those characters are
+ represented in a network protocol.
+
+ The goal of transcription can be described by a simple scenario.
+ Imagine two colleagues, Sam and Kim, sitting in a pub at an
+ international conference and exchanging research ideas. Sam asks Kim
+ for a location to get more information, so Kim writes the URI for the
+ research site on a napkin. Upon returning home, Sam takes out the
+ napkin and types the URI into a computer, which then retrieves the
+ information to which Kim referred.
+
+ There are several design considerations revealed by the scenario:
+
+ o A URI is a sequence of characters that is not always represented
+ as a sequence of octets.
+
+ o A URI might be transcribed from a non-network source and thus
+ should consist of characters that are most likely able to be
+ entered into a computer, within the constraints imposed by
+ keyboards (and related input devices) across languages and
+ locales.
+
+ o A URI often has to be remembered by people, and it is easier for
+ people to remember a URI when it consists of meaningful or
+ familiar components.
+
+ These design considerations are not always in alignment. For
+ example, it is often the case that the most meaningful name for a URI
+ component would require characters that cannot be typed into some
+ systems. The ability to transcribe a resource identifier from one
+ medium to another has been considered more important than having a
+ URI consist of the most meaningful of components.
+
+ In local or regional contexts and with improving technology, users
+ might benefit from being able to use a wider range of characters;
+ such use is not defined by this specification. Percent-encoded
+ octets (<a href="#section-2.1">Section 2.1</a>) may be used within a URI to represent characters
+ outside the range of the US-ASCII coded character set if this
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 8]</span>
+<a name="page-9" id="page-9" href="#page-9"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ representation is allowed by the scheme or by the protocol element in
+ which the URI is referenced. Such a definition should specify the
+ character encoding used to map those characters to octets prior to
+ being percent-encoded for the URI.
+
+<span class="h4"><h4><a name="section-1.2.2">1.2.2</a>. Separating Identification from Interaction</h4></span>
+
+ A common misunderstanding of URIs is that they are only used to refer
+ to accessible resources. The URI itself only provides
+ identification; access to the resource is neither guaranteed nor
+ implied by the presence of a URI. Instead, any operation associated
+ with a URI reference is defined by the protocol element, data format
+ attribute, or natural language text in which it appears.
+
+ Given a URI, a system may attempt to perform a variety of operations
+ on the resource, as might be characterized by words such as "access",
+ "update", "replace", or "find attributes". Such operations are
+ defined by the protocols that make use of URIs, not by this
+ specification. However, we do use a few general terms for describing
+ common operations on URIs. URI "resolution" is the process of
+ determining an access mechanism and the appropriate parameters
+ necessary to dereference a URI; this resolution may require several
+ iterations. To use that access mechanism to perform an action on the
+ URI's resource is to "dereference" the URI.
+
+ When URIs are used within information retrieval systems to identify
+ sources of information, the most common form of URI dereference is
+ "retrieval": making use of a URI in order to retrieve a
+ representation of its associated resource. A "representation" is a
+ sequence of octets, along with representation metadata describing
+ those octets, that constitutes a record of the state of the resource
+ at the time when the representation is generated. Retrieval is
+ achieved by a process that might include using the URI as a cache key
+ to check for a locally cached representation, resolution of the URI
+ to determine an appropriate access mechanism (if any), and
+ dereference of the URI for the sake of applying a retrieval
+ operation. Depending on the protocols used to perform the retrieval,
+ additional information might be supplied about the resource (resource
+ metadata) and its relation to other resources.
+
+ URI references in information retrieval systems are designed to be
+ late-binding: the result of an access is generally determined when it
+ is accessed and may vary over time or due to other aspects of the
+ interaction. These references are created in order to be used in the
+ future: what is being identified is not some specific result that was
+ obtained in the past, but rather some characteristic that is expected
+ to be true for future results. In such cases, the resource referred
+ to by the URI is actually a sameness of characteristics as observed
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 9]</span>
+<a name="page-10" id="page-10" href="#page-10"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ over time, perhaps elucidated by additional comments or assertions
+ made by the resource provider.
+
+ Although many URI schemes are named after protocols, this does not
+ imply that use of these URIs will result in access to the resource
+ via the named protocol. URIs are often used simply for the sake of
+ identification. Even when a URI is used to retrieve a representation
+ of a resource, that access might be through gateways, proxies,
+ caches, and name resolution services that are independent of the
+ protocol associated with the scheme name. The resolution of some
+ URIs may require the use of more than one protocol (e.g., both DNS
+ and HTTP are typically used to access an "http" URI's origin server
+ when a representation isn't found in a local cache).
+
+<span class="h4"><h4><a name="section-1.2.3">1.2.3</a>. Hierarchical Identifiers</h4></span>
+
+ The URI syntax is organized hierarchically, with components listed in
+ order of decreasing significance from left to right. For some URI
+ schemes, the visible hierarchy is limited to the scheme itself:
+ everything after the scheme component delimiter (":") is considered
+ opaque to URI processing. Other URI schemes make the hierarchy
+ explicit and visible to generic parsing algorithms.
+
+ The generic syntax uses the slash ("/"), question mark ("?"), and
+ number sign ("#") characters to delimit components that are
+ significant to the generic parser's hierarchical interpretation of an
+ identifier. In addition to aiding the readability of such
+ identifiers through the consistent use of familiar syntax, this
+ uniform representation of hierarchy across naming schemes allows
+ scheme-independent references to be made relative to that hierarchy.
+
+ It is often the case that a group or "tree" of documents has been
+ constructed to serve a common purpose, wherein the vast majority of
+ URI references in these documents point to resources within the tree
+ rather than outside it. Similarly, documents located at a particular
+ site are much more likely to refer to other resources at that site
+ than to resources at remote sites. Relative referencing of URIs
+ allows document trees to be partially independent of their location
+ and access scheme. For instance, it is possible for a single set of
+ hypertext documents to be simultaneously accessible and traversable
+ via each of the "file", "http", and "ftp" schemes if the documents
+ refer to each other with relative references. Furthermore, such
+ document trees can be moved, as a whole, without changing any of the
+ relative references.
+
+ A relative reference (<a href="#section-4.2">Section 4.2</a>) refers to a resource by describing
+ the difference within a hierarchical name space between the reference
+ context and the target URI. The reference resolution algorithm,
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 10]</span>
+<a name="page-11" id="page-11" href="#page-11"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ presented in <a href="#section-5">Section 5</a>, defines how such a reference is transformed
+ to the target URI. As relative references can only be used within
+ the context of a hierarchical URI, designers of new URI schemes
+ should use a syntax consistent with the generic syntax's hierarchical
+ components unless there are compelling reasons to forbid relative
+ referencing within that scheme.
+
+ NOTE: Previous specifications used the terms "partial URI" and
+ "relative URI" to denote a relative reference to a URI. As some
+ readers misunderstood those terms to mean that relative URIs are a
+ subset of URIs rather than a method of referencing URIs, this
+ specification simply refers to them as relative references.
+
+ All URI references are parsed by generic syntax parsers when used.
+ However, because hierarchical processing has no effect on an absolute
+ URI used in a reference unless it contains one or more dot-segments
+ (complete path segments of "." or "..", as described in <a href="#section-3.3">Section 3.3</a>),
+ URI scheme specifications can define opaque identifiers by
+ disallowing use of slash characters, question mark characters, and
+ the URIs "scheme:." and "scheme:..".
+
+<span class="h3"><h3><a name="section-1.3">1.3</a>. Syntax Notation</h3></span>
+
+ This specification uses the Augmented Backus-Naur Form (ABNF)
+ notation of [<a href="http://tools.ietf.org/html/rfc2234" title="&quot;Augmented BNF for Syntax Specifications: ABNF&quot;">RFC2234</a>], including the following core ABNF syntax rules
+ defined by that specification: ALPHA (letters), CR (carriage return),
+ DIGIT (decimal digits), DQUOTE (double quote), HEXDIG (hexadecimal
+ digits), LF (line feed), and SP (space). The complete URI syntax is
+ collected in Appendix A.
+
+<span class="h2"><h2><a name="section-2">2</a>. Characters</h2></span>
+
+ The URI syntax provides a method of encoding data, presumably for the
+ sake of identifying a resource, as a sequence of characters. The URI
+ characters are, in turn, frequently encoded as octets for transport
+ or presentation. This specification does not mandate any particular
+ character encoding for mapping between URI characters and the octets
+ used to store or transmit those characters. When a URI appears in a
+ protocol element, the character encoding is defined by that protocol;
+ without such a definition, a URI is assumed to be in the same
+ character encoding as the surrounding text.
+
+ The ABNF notation defines its terminal values to be non-negative
+ integers (codepoints) based on the US-ASCII coded character set
+ [<a href="#ref-ASCII" title="&quot;Coded Character Set -- 7-bit American Standard Code for Information Interchange&quot;">ASCII</a>]. Because a URI is a sequence of characters, we must invert
+ that relation in order to understand the URI syntax. Therefore, the
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 11]</span>
+<a name="page-12" id="page-12" href="#page-12"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ integer values used by the ABNF must be mapped back to their
+ corresponding characters via US-ASCII in order to complete the syntax
+ rules.
+
+ A URI is composed from a limited set of characters consisting of
+ digits, letters, and a few graphic symbols. A reserved subset of
+ those characters may be used to delimit syntax components within a
+ URI while the remaining characters, including both the unreserved set
+ and those reserved characters not acting as delimiters, define each
+ component's identifying data.
+
+<span class="h3"><h3><a name="section-2.1">2.1</a>. Percent-Encoding</h3></span>
+
+ A percent-encoding mechanism is used to represent a data octet in a
+ component when that octet's corresponding character is outside the
+ allowed set or is being used as a delimiter of, or within, the
+ component. A percent-encoded octet is encoded as a character
+ triplet, consisting of the percent character "%" followed by the two
+ hexadecimal digits representing that octet's numeric value. For
+ example, "%20" is the percent-encoding for the binary octet
+ "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space
+ character (SP). <a href="#section-2.4">Section 2.4</a> describes when percent-encoding and
+ decoding is applied.
+
+ pct-encoded = "%" HEXDIG HEXDIG
+
+ The uppercase hexadecimal digits 'A' through 'F' are equivalent to
+ the lowercase digits 'a' through 'f', respectively. If two URIs
+ differ only in the case of hexadecimal digits used in percent-encoded
+ octets, they are equivalent. For consistency, URI producers and
+ normalizers should use uppercase hexadecimal digits for all percent-
+ encodings.
+
+<span class="h3"><h3><a name="section-2.2">2.2</a>. Reserved Characters</h3></span>
+
+ URIs include components and subcomponents that are delimited by
+ characters in the "reserved" set. These characters are called
+ "reserved" because they may (or may not) be defined as delimiters by
+ the generic syntax, by each scheme-specific syntax, or by the
+ implementation-specific syntax of a URI's dereferencing algorithm.
+ If data for a URI component would conflict with a reserved
+ character's purpose as a delimiter, then the conflicting data must be
+ percent-encoded before the URI is formed.
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 12]</span>
+<a name="page-13" id="page-13" href="#page-13"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ reserved = gen-delims / sub-delims
+
+ gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+
+ sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")"
+ / "*" / "+" / "," / ";" / "="
+
+ The purpose of reserved characters is to provide a set of delimiting
+ characters that are distinguishable from other data within a URI.
+ URIs that differ in the replacement of a reserved character with its
+ corresponding percent-encoded octet are not equivalent. Percent-
+ encoding a reserved character, or decoding a percent-encoded octet
+ that corresponds to a reserved character, will change how the URI is
+ interpreted by most applications. Thus, characters in the reserved
+ set are protected from normalization and are therefore safe to be
+ used by scheme-specific and producer-specific algorithms for
+ delimiting data subcomponents within a URI.
+
+ A subset of the reserved characters (gen-delims) is used as
+ delimiters of the generic URI components described in <a href="#section-3">Section 3</a>. A
+ component's ABNF syntax rule will not use the reserved or gen-delims
+ rule names directly; instead, each syntax rule lists the characters
+ allowed within that component (i.e., not delimiting it), and any of
+ those characters that are also in the reserved set are "reserved" for
+ use as subcomponent delimiters within the component. Only the most
+ common subcomponents are defined by this specification; other
+ subcomponents may be defined by a URI scheme's specification, or by
+ the implementation-specific syntax of a URI's dereferencing
+ algorithm, provided that such subcomponents are delimited by
+ characters in the reserved set allowed within that component.
+
+ URI producing applications should percent-encode data octets that
+ correspond to characters in the reserved set unless these characters
+ are specifically allowed by the URI scheme to represent data in that
+ component. If a reserved character is found in a URI component and
+ no delimiting role is known for that character, then it must be
+ interpreted as representing the data octet corresponding to that
+ character's encoding in US-ASCII.
+
+<span class="h3"><h3><a name="section-2.3">2.3</a>. Unreserved Characters</h3></span>
+
+ Characters that are allowed in a URI but do not have a reserved
+ purpose are called unreserved. These include uppercase and lowercase
+ letters, decimal digits, hyphen, period, underscore, and tilde.
+
+ unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 13]</span>
+<a name="page-14" id="page-14" href="#page-14"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ URIs that differ in the replacement of an unreserved character with
+ its corresponding percent-encoded US-ASCII octet are equivalent: they
+ identify the same resource. However, URI comparison implementations
+ do not always perform normalization prior to comparison (see Section
+ 6). For consistency, percent-encoded octets in the ranges of ALPHA
+ (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E),
+ underscore (%5F), or tilde (%7E) should not be created by URI
+ producers and, when found in a URI, should be decoded to their
+ corresponding unreserved characters by URI normalizers.
+
+<span class="h3"><h3><a name="section-2.4">2.4</a>. When to Encode or Decode</h3></span>
+
+ Under normal circumstances, the only time when octets within a URI
+ are percent-encoded is during the process of producing the URI from
+ its component parts. This is when an implementation determines which
+ of the reserved characters are to be used as subcomponent delimiters
+ and which can be safely used as data. Once produced, a URI is always
+ in its percent-encoded form.
+
+ When a URI is dereferenced, the components and subcomponents
+ significant to the scheme-specific dereferencing process (if any)
+ must be parsed and separated before the percent-encoded octets within
+ those components can be safely decoded, as otherwise the data may be
+ mistaken for component delimiters. The only exception is for
+ percent-encoded octets corresponding to characters in the unreserved
+ set, which can be decoded at any time. For example, the octet
+ corresponding to the tilde ("~") character is often encoded as "%7E"
+ by older URI processing implementations; the "%7E" can be replaced by
+ "~" without changing its interpretation.
+
+ Because the percent ("%") character serves as the indicator for
+ percent-encoded octets, it must be percent-encoded as "%25" for that
+ octet to be used as data within a URI. Implementations must not
+ percent-encode or decode the same string more than once, as decoding
+ an already decoded string might lead to misinterpreting a percent
+ data octet as the beginning of a percent-encoding, or vice versa in
+ the case of percent-encoding an already percent-encoded string.
+
+<span class="h3"><h3><a name="section-2.5">2.5</a>. Identifying Data</h3></span>
+
+ URI characters provide identifying data for each of the URI
+ components, serving as an external interface for identification
+ between systems. Although the presence and nature of the URI
+ production interface is hidden from clients that use its URIs (and is
+ thus beyond the scope of the interoperability requirements defined by
+ this specification), it is a frequent source of confusion and errors
+ in the interpretation of URI character issues. Implementers have to
+ be aware that there are multiple character encodings involved in the
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 14]</span>
+<a name="page-15" id="page-15" href="#page-15"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ production and transmission of URIs: local name and data encoding,
+ public interface encoding, URI character encoding, data format
+ encoding, and protocol encoding.
+
+ Local names, such as file system names, are stored with a local
+ character encoding. URI producing applications (e.g., origin
+ servers) will typically use the local encoding as the basis for
+ producing meaningful names. The URI producer will transform the
+ local encoding to one that is suitable for a public interface and
+ then transform the public interface encoding into the restricted set
+ of URI characters (reserved, unreserved, and percent-encodings).
+ Those characters are, in turn, encoded as octets to be used as a
+ reference within a data format (e.g., a document charset), and such
+ data formats are often subsequently encoded for transmission over
+ Internet protocols.
+
+ For most systems, an unreserved character appearing within a URI
+ component is interpreted as representing the data octet corresponding
+ to that character's encoding in US-ASCII. Consumers of URIs assume
+ that the letter "X" corresponds to the octet "01011000", and even
+ when that assumption is incorrect, there is no harm in making it. A
+ system that internally provides identifiers in the form of a
+ different character encoding, such as EBCDIC, will generally perform
+ character translation of textual identifiers to UTF-8 [<a href="#ref-STD63" title="&quot;UTF-8, a transformation format of ISO 10646&quot;">STD63</a>] (or
+ some other superset of the US-ASCII character encoding) at an
+ internal interface, thereby providing more meaningful identifiers
+ than those resulting from simply percent-encoding the original
+ octets.
+
+ For example, consider an information service that provides data,
+ stored locally using an EBCDIC-based file system, to clients on the
+ Internet through an HTTP server. When an author creates a file with
+ the name "Laguna Beach" on that file system, the "http" URI
+ corresponding to that resource is expected to contain the meaningful
+ string "Laguna%20Beach". If, however, that server produces URIs by
+ using an overly simplistic raw octet mapping, then the result would
+ be a URI containing "%D3%81%87%A4%95%81@%C2%85%81%83%88". An
+ internal transcoding interface fixes this problem by transcoding the
+ local name to a superset of US-ASCII prior to producing the URI.
+ Naturally, proper interpretation of an incoming URI on such an
+ interface requires that percent-encoded octets be decoded (e.g.,
+ "%20" to SP) before the reverse transcoding is applied to obtain the
+ local name.
+
+ In some cases, the internal interface between a URI component and the
+ identifying data that it has been crafted to represent is much less
+ direct than a character encoding translation. For example, portions
+ of a URI might reflect a query on non-ASCII data, or numeric
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 15]</span>
+<a name="page-16" id="page-16" href="#page-16"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ coordinates on a map. Likewise, a URI scheme may define components
+ with additional encoding requirements that are applied prior to
+ forming the component and producing the URI.
+
+ When a new URI scheme defines a component that represents textual
+ data consisting of characters from the Universal Character Set [<a href="#ref-UCS" title="&quot;Information Technology - Universal Multiple-Octet Coded Character Set (UCS)&quot;">UCS</a>],
+ the data should first be encoded as octets according to the UTF-8
+ character encoding [<a href="#ref-STD63" title="&quot;UTF-8, a transformation format of ISO 10646&quot;">STD63</a>]; then only those octets that do not
+ correspond to characters in the unreserved set should be percent-
+ encoded. For example, the character A would be represented as "A",
+ the character LATIN CAPITAL LETTER A WITH GRAVE would be represented
+ as "%C3%80", and the character KATAKANA LETTER A would be represented
+ as "%E3%82%A2".
+
+<span class="h2"><h2><a name="section-3">3</a>. Syntax Components</h2></span>
+
+ The generic URI syntax consists of a hierarchical sequence of
+ components referred to as the scheme, authority, path, query, and
+ fragment.
+
+ URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+
+ hier-part = "//" authority path-abempty
+ / path-absolute
+ / path-rootless
+ / path-empty
+
+ The scheme and path components are required, though the path may be
+ empty (no characters). When authority is present, the path must
+ either be empty or begin with a slash ("/") character. When
+ authority is not present, the path cannot begin with two slash
+ characters ("//"). These restrictions result in five different ABNF
+ rules for a path (<a href="#section-3.3">Section 3.3</a>), only one of which will match any
+ given URI reference.
+
+ The following are two example URIs and their component parts:
+
+ foo://example.com:8042/over/there?name=ferret#nose
+ \_/ \______________/\_________/ \_________/ \__/
+ | | | | |
+ scheme authority path query fragment
+ | _____________________|__
+ / \ / \
+ urn:example:animal:ferret:nose
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 16]</span>
+<a name="page-17" id="page-17" href="#page-17"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+<span class="h3"><h3><a name="section-3.1">3.1</a>. Scheme</h3></span>
+
+ Each URI begins with a scheme name that refers to a specification for
+ assigning identifiers within that scheme. As such, the URI syntax is
+ a federated and extensible naming system wherein each scheme's
+ specification may further restrict the syntax and semantics of
+ identifiers using that scheme.
+
+ Scheme names consist of a sequence of characters beginning with a
+ letter and followed by any combination of letters, digits, plus
+ ("+"), period ("."), or hyphen ("-"). Although schemes are case-
+ insensitive, the canonical form is lowercase and documents that
+ specify schemes must do so with lowercase letters. An implementation
+ should accept uppercase letters as equivalent to lowercase in scheme
+ names (e.g., allow "HTTP" as well as "http") for the sake of
+ robustness but should only produce lowercase scheme names for
+ consistency.
+
+ scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+
+ Individual schemes are not specified by this document. The process
+ for registration of new URI schemes is defined separately by [<a href="#ref-BCP35" title="&quot;Registration Procedures for URL Scheme Names&quot;">BCP35</a>].
+ The scheme registry maintains the mapping between scheme names and
+ their specifications. Advice for designers of new URI schemes can be
+ found in [<a href="http://tools.ietf.org/html/rfc2718" title="&quot;Guidelines for new URL Schemes&quot;">RFC2718</a>]. URI scheme specifications must define their own
+ syntax so that all strings matching their scheme-specific syntax will
+ also match the &lt;absolute-URI&gt; grammar, as described in <a href="#section-4.3">Section 4.3</a>.
+
+ When presented with a URI that violates one or more scheme-specific
+ restrictions, the scheme-specific resolution process should flag the
+ reference as an error rather than ignore the unused parts; doing so
+ reduces the number of equivalent URIs and helps detect abuses of the
+ generic syntax, which might indicate that the URI has been
+ constructed to mislead the user (<a href="#section-7.6">Section 7.6</a>).
+
+<span class="h3"><h3><a name="section-3.2">3.2</a>. Authority</h3></span>
+
+ Many URI schemes include a hierarchical element for a naming
+ authority so that governance of the name space defined by the
+ remainder of the URI is delegated to that authority (which may, in
+ turn, delegate it further). The generic syntax provides a common
+ means for distinguishing an authority based on a registered name or
+ server address, along with optional port and user information.
+
+ The authority component is preceded by a double slash ("//") and is
+ terminated by the next slash ("/"), question mark ("?"), or number
+ sign ("#") character, or by the end of the URI.
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 17]</span>
+<a name="page-18" id="page-18" href="#page-18"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ authority = [ userinfo "@" ] host [ ":" port ]
+
+ URI producers and normalizers should omit the ":" delimiter that
+ separates host from port if the port component is empty. Some
+ schemes do not allow the userinfo and/or port subcomponents.
+
+ If a URI contains an authority component, then the path component
+ must either be empty or begin with a slash ("/") character. Non-
+ validating parsers (those that merely separate a URI reference into
+ its major components) will often ignore the subcomponent structure of
+ authority, treating it as an opaque string from the double-slash to
+ the first terminating delimiter, until such time as the URI is
+ dereferenced.
+
+<span class="h4"><h4><a name="section-3.2.1">3.2.1</a>. User Information</h4></span>
+
+ The userinfo subcomponent may consist of a user name and, optionally,
+ scheme-specific information about how to gain authorization to access
+ the resource. The user information, if present, is followed by a
+ commercial at-sign ("@") that delimits it from the host.
+
+ userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+
+ Use of the format "user:password" in the userinfo field is
+ deprecated. Applications should not render as clear text any data
+ after the first colon (":") character found within a userinfo
+ subcomponent unless the data after the colon is the empty string
+ (indicating no password). Applications may choose to ignore or
+ reject such data when it is received as part of a reference and
+ should reject the storage of such data in unencrypted form. The
+ passing of authentication information in clear text has proven to be
+ a security risk in almost every case where it has been used.
+
+ Applications that render a URI for the sake of user feedback, such as
+ in graphical hypertext browsing, should render userinfo in a way that
+ is distinguished from the rest of a URI, when feasible. Such
+ rendering will assist the user in cases where the userinfo has been
+ misleadingly crafted to look like a trusted domain name
+ (<a href="#section-7.6">Section 7.6</a>).
+
+<span class="h4"><h4><a name="section-3.2.2">3.2.2</a>. Host</h4></span>
+
+ The host subcomponent of authority is identified by an IP literal
+ encapsulated within square brackets, an IPv4 address in dotted-
+ decimal form, or a registered name. The host subcomponent is case-
+ insensitive. The presence of a host subcomponent within a URI does
+ not imply that the scheme requires access to the given host on the
+ Internet. In many cases, the host syntax is used only for the sake
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 18]</span>
+<a name="page-19" id="page-19" href="#page-19"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ of reusing the existing registration process created and deployed for
+ DNS, thus obtaining a globally unique name without the cost of
+ deploying another registry. However, such use comes with its own
+ costs: domain name ownership may change over time for reasons not
+ anticipated by the URI producer. In other cases, the data within the
+ host component identifies a registered name that has nothing to do
+ with an Internet host. We use the name "host" for the ABNF rule
+ because that is its most common purpose, not its only purpose.
+
+ host = IP-literal / IPv4address / reg-name
+
+ The syntax rule for host is ambiguous because it does not completely
+ distinguish between an IPv4address and a reg-name. In order to
+ disambiguate the syntax, we apply the "first-match-wins" algorithm:
+ If host matches the rule for IPv4address, then it should be
+ considered an IPv4 address literal and not a reg-name. Although host
+ is case-insensitive, producers and normalizers should use lowercase
+ for registered names and hexadecimal addresses for the sake of
+ uniformity, while only using uppercase letters for percent-encodings.
+
+ A host identified by an Internet Protocol literal address, version 6
+ [<a href="http://tools.ietf.org/html/rfc3513" title="&quot;Internet Protocol Version 6 (IPv6) Addressing Architecture&quot;">RFC3513</a>] or later, is distinguished by enclosing the IP literal
+ within square brackets ("[" and "]"). This is the only place where
+ square bracket characters are allowed in the URI syntax. In
+ anticipation of future, as-yet-undefined IP literal address formats,
+ an implementation may use an optional version flag to indicate such a
+ format explicitly rather than rely on heuristic determination.
+
+ IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+
+ IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+
+ The version flag does not indicate the IP version; rather, it
+ indicates future versions of the literal format. As such,
+ implementations must not provide the version flag for the existing
+ IPv4 and IPv6 literal address forms described below. If a URI
+ containing an IP-literal that starts with "v" (case-insensitive),
+ indicating that the version flag is present, is dereferenced by an
+ application that does not know the meaning of that version flag, then
+ the application should return an appropriate error for "address
+ mechanism not supported".
+
+ A host identified by an IPv6 literal address is represented inside
+ the square brackets without a preceding version flag. The ABNF
+ provided here is a translation of the text definition of an IPv6
+ literal address provided in [<a href="http://tools.ietf.org/html/rfc3513" title="&quot;Internet Protocol Version 6 (IPv6) Addressing Architecture&quot;">RFC3513</a>]. This syntax does not support
+ IPv6 scoped addressing zone identifiers.
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 19]</span>
+<a name="page-20" id="page-20" href="#page-20"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ A 128-bit IPv6 address is divided into eight 16-bit pieces. Each
+ piece is represented numerically in case-insensitive hexadecimal,
+ using one to four hexadecimal digits (leading zeroes are permitted).
+ The eight encoded pieces are given most-significant first, separated
+ by colon characters. Optionally, the least-significant two pieces
+ may instead be represented in IPv4 address textual format. A
+ sequence of one or more consecutive zero-valued 16-bit pieces within
+ the address may be elided, omitting all their digits and leaving
+ exactly two consecutive colons in their place to mark the elision.
+
+ IPv6address = 6( h16 ":" ) ls32
+ / "::" 5( h16 ":" ) ls32
+ / [ h16 ] "::" 4( h16 ":" ) ls32
+ / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+ / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+ / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
+ / [ *4( h16 ":" ) h16 ] "::" ls32
+ / [ *5( h16 ":" ) h16 ] "::" h16
+ / [ *6( h16 ":" ) h16 ] "::"
+
+ ls32 = ( h16 ":" h16 ) / IPv4address
+ ; least-significant 32 bits of address
+
+ h16 = 1*4HEXDIG
+ ; 16 bits of address represented in hexadecimal
+
+ A host identified by an IPv4 literal address is represented in
+ dotted-decimal notation (a sequence of four decimal numbers in the
+ range 0 to 255, separated by "."), as described in [<a href="http://tools.ietf.org/html/rfc1123" title="&quot;Requirements for Internet Hosts - Application and Support&quot;">RFC1123</a>] by
+ reference to [<a href="http://tools.ietf.org/html/rfc0952" title="&quot;DoD Internet host table specification&quot;">RFC0952</a>]. Note that other forms of dotted notation may
+ be interpreted on some platforms, as described in <a href="#section-7.4">Section 7.4</a>, but
+ only the dotted-decimal form of four octets is allowed by this
+ grammar.
+
+ IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+
+ dec-octet = DIGIT ; 0-9
+ / %x31-39 DIGIT ; 10-99
+ / "1" 2DIGIT ; 100-199
+ / "2" %x30-34 DIGIT ; 200-249
+ / "25" %x30-35 ; 250-255
+
+ A host identified by a registered name is a sequence of characters
+ usually intended for lookup within a locally defined host or service
+ name registry, though the URI's scheme-specific semantics may require
+ that a specific registry (or fixed name table) be used instead. The
+ most common name registry mechanism is the Domain Name System (DNS).
+ A registered name intended for lookup in the DNS uses the syntax
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 20]</span>
+<a name="page-21" id="page-21" href="#page-21"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ defined in <a href="#section-3.5">Section 3.5</a> of [<a href="http://tools.ietf.org/html/rfc1034" title="&quot;Domain names - concepts and facilities&quot;">RFC1034</a>] and <a href="#section-2.1">Section 2.1</a> of [<a href="http://tools.ietf.org/html/rfc1123" title="&quot;Requirements for Internet Hosts - Application and Support&quot;">RFC1123</a>].
+ Such a name consists of a sequence of domain labels separated by ".",
+ each domain label starting and ending with an alphanumeric character
+ and possibly also containing "-" characters. The rightmost domain
+ label of a fully qualified domain name in DNS may be followed by a
+ single "." and should be if it is necessary to distinguish between
+ the complete domain name and some local domain.
+
+ reg-name = *( unreserved / pct-encoded / sub-delims )
+
+ If the URI scheme defines a default for host, then that default
+ applies when the host subcomponent is undefined or when the
+ registered name is empty (zero length). For example, the "file" URI
+ scheme is defined so that no authority, an empty host, and
+ "localhost" all mean the end-user's machine, whereas the "http"
+ scheme considers a missing authority or empty host invalid.
+
+ This specification does not mandate a particular registered name
+ lookup technology and therefore does not restrict the syntax of reg-
+ name beyond what is necessary for interoperability. Instead, it
+ delegates the issue of registered name syntax conformance to the
+ operating system of each application performing URI resolution, and
+ that operating system decides what it will allow for the purpose of
+ host identification. A URI resolution implementation might use DNS,
+ host tables, yellow pages, NetInfo, WINS, or any other system for
+ lookup of registered names. However, a globally scoped naming
+ system, such as DNS fully qualified domain names, is necessary for
+ URIs intended to have global scope. URI producers should use names
+ that conform to the DNS syntax, even when use of DNS is not
+ immediately apparent, and should limit these names to no more than
+ 255 characters in length.
+
+ The reg-name syntax allows percent-encoded octets in order to
+ represent non-ASCII registered names in a uniform way that is
+ independent of the underlying name resolution technology. Non-ASCII
+ characters must first be encoded according to UTF-8 [<a href="#ref-STD63" title="&quot;UTF-8, a transformation format of ISO 10646&quot;">STD63</a>], and then
+ each octet of the corresponding UTF-8 sequence must be percent-
+ encoded to be represented as URI characters. URI producing
+ applications must not use percent-encoding in host unless it is used
+ to represent a UTF-8 character sequence. When a non-ASCII registered
+ name represents an internationalized domain name intended for
+ resolution via the DNS, the name must be transformed to the IDNA
+ encoding [<a href="http://tools.ietf.org/html/rfc3490" title="&quot;Internationalizing Domain Names in Applications (IDNA)&quot;">RFC3490</a>] prior to name lookup. URI producers should
+ provide these registered names in the IDNA encoding, rather than a
+ percent-encoding, if they wish to maximize interoperability with
+ legacy URI resolvers.
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 21]</span>
+<a name="page-22" id="page-22" href="#page-22"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+<span class="h4"><h4><a name="section-3.2.3">3.2.3</a>. Port</h4></span>
+
+ The port subcomponent of authority is designated by an optional port
+ number in decimal following the host and delimited from it by a
+ single colon (":") character.
+
+ port = *DIGIT
+
+ A scheme may define a default port. For example, the "http" scheme
+ defines a default port of "80", corresponding to its reserved TCP
+ port number. The type of port designated by the port number (e.g.,
+ TCP, UDP, SCTP) is defined by the URI scheme. URI producers and
+ normalizers should omit the port component and its ":" delimiter if
+ port is empty or if its value would be the same as that of the
+ scheme's default.
+
+<span class="h3"><h3><a name="section-3.3">3.3</a>. Path</h3></span>
+
+ The path component contains data, usually organized in hierarchical
+ form, that, along with data in the non-hierarchical query component
+ (<a href="#section-3.4">Section 3.4</a>), serves to identify a resource within the scope of the
+ URI's scheme and naming authority (if any). The path is terminated
+ by the first question mark ("?") or number sign ("#") character, or
+ by the end of the URI.
+
+ If a URI contains an authority component, then the path component
+ must either be empty or begin with a slash ("/") character. If a URI
+ does not contain an authority component, then the path cannot begin
+ with two slash characters ("//"). In addition, a URI reference
+ (<a href="#section-4.1">Section 4.1</a>) may be a relative-path reference, in which case the
+ first path segment cannot contain a colon (":") character. The ABNF
+ requires five separate rules to disambiguate these cases, only one of
+ which will match the path substring within a given URI reference. We
+ use the generic term "path component" to describe the URI substring
+ matched by the parser to one of these rules.
+
+ path = path-abempty ; begins with "/" or is empty
+ / path-absolute ; begins with "/" but not "//"
+ / path-noscheme ; begins with a non-colon segment
+ / path-rootless ; begins with a segment
+ / path-empty ; zero characters
+
+ path-abempty = *( "/" segment )
+ path-absolute = "/" [ segment-nz *( "/" segment ) ]
+ path-noscheme = segment-nz-nc *( "/" segment )
+ path-rootless = segment-nz *( "/" segment )
+ path-empty = 0&lt;pchar&gt;
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 22]</span>
+<a name="page-23" id="page-23" href="#page-23"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ segment = *pchar
+ segment-nz = 1*pchar
+ segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+ ; non-zero-length segment without any colon ":"
+
+ pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+
+ A path consists of a sequence of path segments separated by a slash
+ ("/") character. A path is always defined for a URI, though the
+ defined path may be empty (zero length). Use of the slash character
+ to indicate hierarchy is only required when a URI will be used as the
+ context for relative references. For example, the URI
+ &lt;mailto:fred@example.com&gt; has a path of "fred@example.com", whereas
+ the URI &lt;foo://info.example.com?fred&gt; has an empty path.
+
+ The path segments "." and "..", also known as dot-segments, are
+ defined for relative reference within the path name hierarchy. They
+ are intended for use at the beginning of a relative-path reference
+ (<a href="#section-4.2">Section 4.2</a>) to indicate relative position within the hierarchical
+ tree of names. This is similar to their role within some operating
+ systems' file directory structures to indicate the current directory
+ and parent directory, respectively. However, unlike in a file
+ system, these dot-segments are only interpreted within the URI path
+ hierarchy and are removed as part of the resolution process (Section
+ 5.2).
+
+ Aside from dot-segments in hierarchical paths, a path segment is
+ considered opaque by the generic syntax. URI producing applications
+ often use the reserved characters allowed in a segment to delimit
+ scheme-specific or dereference-handler-specific subcomponents. For
+ example, the semicolon (";") and equals ("=") reserved characters are
+ often used to delimit parameters and parameter values applicable to
+ that segment. The comma (",") reserved character is often used for
+ similar purposes. For example, one URI producer might use a segment
+ such as "name;v=1.1" to indicate a reference to version 1.1 of
+ "name", whereas another might use a segment such as "name,1.1" to
+ indicate the same. Parameter types may be defined by scheme-specific
+ semantics, but in most cases the syntax of a parameter is specific to
+ the implementation of the URI's dereferencing algorithm.
+
+<span class="h3"><h3><a name="section-3.4">3.4</a>. Query</h3></span>
+
+ The query component contains non-hierarchical data that, along with
+ data in the path component (<a href="#section-3.3">Section 3.3</a>), serves to identify a
+ resource within the scope of the URI's scheme and naming authority
+ (if any). The query component is indicated by the first question
+ mark ("?") character and terminated by a number sign ("#") character
+ or by the end of the URI.
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 23]</span>
+<a name="page-24" id="page-24" href="#page-24"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ query = *( pchar / "/" / "?" )
+
+ The characters slash ("/") and question mark ("?") may represent data
+ within the query component. Beware that some older, erroneous
+ implementations may not handle such data correctly when it is used as
+ the base URI for relative references (<a href="#section-5.1">Section 5.1</a>), apparently
+ because they fail to distinguish query data from path data when
+ looking for hierarchical separators. However, as query components
+ are often used to carry identifying information in the form of
+ "key=value" pairs and one frequently used value is a reference to
+ another URI, it is sometimes better for usability to avoid percent-
+ encoding those characters.
+
+<span class="h3"><h3><a name="section-3.5">3.5</a>. Fragment</h3></span>
+
+ The fragment identifier component of a URI allows indirect
+ identification of a secondary resource by reference to a primary
+ resource and additional identifying information. The identified
+ secondary resource may be some portion or subset of the primary
+ resource, some view on representations of the primary resource, or
+ some other resource defined or described by those representations. A
+ fragment identifier component is indicated by the presence of a
+ number sign ("#") character and terminated by the end of the URI.
+
+ fragment = *( pchar / "/" / "?" )
+
+ The semantics of a fragment identifier are defined by the set of
+ representations that might result from a retrieval action on the
+ primary resource. The fragment's format and resolution is therefore
+ dependent on the media type [<a href="http://tools.ietf.org/html/rfc2046" title="&quot;Multipurpose Internet Mail Extensions (MIME) Part Two: Media Types&quot;">RFC2046</a>] of a potentially retrieved
+ representation, even though such a retrieval is only performed if the
+ URI is dereferenced. If no such representation exists, then the
+ semantics of the fragment are considered unknown and are effectively
+ unconstrained. Fragment identifier semantics are independent of the
+ URI scheme and thus cannot be redefined by scheme specifications.
+
+ Individual media types may define their own restrictions on or
+ structures within the fragment identifier syntax for specifying
+ different types of subsets, views, or external references that are
+ identifiable as secondary resources by that media type. If the
+ primary resource has multiple representations, as is often the case
+ for resources whose representation is selected based on attributes of
+ the retrieval request (a.k.a., content negotiation), then whatever is
+ identified by the fragment should be consistent across all of those
+ representations. Each representation should either define the
+ fragment so that it corresponds to the same secondary resource,
+ regardless of how it is represented, or should leave the fragment
+ undefined (i.e., not found).
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 24]</span>
+<a name="page-25" id="page-25" href="#page-25"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ As with any URI, use of a fragment identifier component does not
+ imply that a retrieval action will take place. A URI with a fragment
+ identifier may be used to refer to the secondary resource without any
+ implication that the primary resource is accessible or will ever be
+ accessed.
+
+ Fragment identifiers have a special role in information retrieval
+ systems as the primary form of client-side indirect referencing,
+ allowing an author to specifically identify aspects of an existing
+ resource that are only indirectly provided by the resource owner. As
+ such, the fragment identifier is not used in the scheme-specific
+ processing of a URI; instead, the fragment identifier is separated
+ from the rest of the URI prior to a dereference, and thus the
+ identifying information within the fragment itself is dereferenced
+ solely by the user agent, regardless of the URI scheme. Although
+ this separate handling is often perceived to be a loss of
+ information, particularly for accurate redirection of references as
+ resources move over time, it also serves to prevent information
+ providers from denying reference authors the right to refer to
+ information within a resource selectively. Indirect referencing also
+ provides additional flexibility and extensibility to systems that use
+ URIs, as new media types are easier to define and deploy than new
+ schemes of identification.
+
+ The characters slash ("/") and question mark ("?") are allowed to
+ represent data within the fragment identifier. Beware that some
+ older, erroneous implementations may not handle this data correctly
+ when it is used as the base URI for relative references (Section
+ 5.1).
+
+<span class="h2"><h2><a name="section-4">4</a>. Usage</h2></span>
+
+ When applications make reference to a URI, they do not always use the
+ full form of reference defined by the "URI" syntax rule. To save
+ space and take advantage of hierarchical locality, many Internet
+ protocol elements and media type formats allow an abbreviation of a
+ URI, whereas others restrict the syntax to a particular form of URI.
+ We define the most common forms of reference syntax in this
+ specification because they impact and depend upon the design of the
+ generic syntax, requiring a uniform parsing algorithm in order to be
+ interpreted consistently.
+
+<span class="h3"><h3><a name="section-4.1">4.1</a>. URI Reference</h3></span>
+
+ URI-reference is used to denote the most common usage of a resource
+ identifier.
+
+ URI-reference = URI / relative-ref
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 25]</span>
+<a name="page-26" id="page-26" href="#page-26"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ A URI-reference is either a URI or a relative reference. If the
+ URI-reference's prefix does not match the syntax of a scheme followed
+ by its colon separator, then the URI-reference is a relative
+ reference.
+
+ A URI-reference is typically parsed first into the five URI
+ components, in order to determine what components are present and
+ whether the reference is relative. Then, each component is parsed
+ for its subparts and their validation. The ABNF of URI-reference,
+ along with the "first-match-wins" disambiguation rule, is sufficient
+ to define a validating parser for the generic syntax. Readers
+ familiar with regular expressions should see Appendix B for an
+ example of a non-validating URI-reference parser that will take any
+ given string and extract the URI components.
+
+<span class="h3"><h3><a name="section-4.2">4.2</a>. Relative Reference</h3></span>
+
+ A relative reference takes advantage of the hierarchical syntax
+ (<a href="#section-1.2.3">Section 1.2.3</a>) to express a URI reference relative to the name space
+ of another hierarchical URI.
+
+ relative-ref = relative-part [ "?" query ] [ "#" fragment ]
+
+ relative-part = "//" authority path-abempty
+ / path-absolute
+ / path-noscheme
+ / path-empty
+
+ The URI referred to by a relative reference, also known as the target
+ URI, is obtained by applying the reference resolution algorithm of
+ <a href="#section-5">Section 5</a>.
+
+ A relative reference that begins with two slash characters is termed
+ a network-path reference; such references are rarely used. A
+ relative reference that begins with a single slash character is
+ termed an absolute-path reference. A relative reference that does
+ not begin with a slash character is termed a relative-path reference.
+
+ A path segment that contains a colon character (e.g., "this:that")
+ cannot be used as the first segment of a relative-path reference, as
+ it would be mistaken for a scheme name. Such a segment must be
+ preceded by a dot-segment (e.g., "./this:that") to make a relative-
+ path reference.
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 26]</span>
+<a name="page-27" id="page-27" href="#page-27"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+<span class="h3"><h3><a name="section-4.3">4.3</a>. Absolute URI</h3></span>
+
+ Some protocol elements allow only the absolute form of a URI without
+ a fragment identifier. For example, defining a base URI for later
+ use by relative references calls for an absolute-URI syntax rule that
+ does not allow a fragment.
+
+ absolute-URI = scheme ":" hier-part [ "?" query ]
+
+ URI scheme specifications must define their own syntax so that all
+ strings matching their scheme-specific syntax will also match the
+ &lt;absolute-URI&gt; grammar. Scheme specifications will not define
+ fragment identifier syntax or usage, regardless of its applicability
+ to resources identifiable via that scheme, as fragment identification
+ is orthogonal to scheme definition. However, scheme specifications
+ are encouraged to include a wide range of examples, including
+ examples that show use of the scheme's URIs with fragment identifiers
+ when such usage is appropriate.
+
+<span class="h3"><h3><a name="section-4.4">4.4</a>. Same-Document Reference</h3></span>
+
+ When a URI reference refers to a URI that is, aside from its fragment
+ component (if any), identical to the base URI (<a href="#section-5.1">Section 5.1</a>), that
+ reference is called a "same-document" reference. The most frequent
+ examples of same-document references are relative references that are
+ empty or include only the number sign ("#") separator followed by a
+ fragment identifier.
+
+ When a same-document reference is dereferenced for a retrieval
+ action, the target of that reference is defined to be within the same
+ entity (representation, document, or message) as the reference;
+ therefore, a dereference should not result in a new retrieval action.
+
+ Normalization of the base and target URIs prior to their comparison,
+ as described in Sections 6.2.2 and 6.2.3, is allowed but rarely
+ performed in practice. Normalization may increase the set of same-
+ document references, which may be of benefit to some caching
+ applications. As such, reference authors should not assume that a
+ slightly different, though equivalent, reference URI will (or will
+ not) be interpreted as a same-document reference by any given
+ application.
+
+<span class="h3"><h3><a name="section-4.5">4.5</a>. Suffix Reference</h3></span>
+
+ The URI syntax is designed for unambiguous reference to resources and
+ extensibility via the URI scheme. However, as URI identification and
+ usage have become commonplace, traditional media (television, radio,
+ newspapers, billboards, etc.) have increasingly used a suffix of the
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 27]</span>
+<a name="page-28" id="page-28" href="#page-28"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ URI as a reference, consisting of only the authority and path
+ portions of the URI, such as
+
+ www.w3.org/Addressing/
+
+ or simply a DNS registered name on its own. Such references are
+ primarily intended for human interpretation rather than for machines,
+ with the assumption that context-based heuristics are sufficient to
+ complete the URI (e.g., most registered names beginning with "www"
+ are likely to have a URI prefix of "http://"). Although there is no
+ standard set of heuristics for disambiguating a URI suffix, many
+ client implementations allow them to be entered by the user and
+ heuristically resolved.
+
+ Although this practice of using suffix references is common, it
+ should be avoided whenever possible and should never be used in
+ situations where long-term references are expected. The heuristics
+ noted above will change over time, particularly when a new URI scheme
+ becomes popular, and are often incorrect when used out of context.
+ Furthermore, they can lead to security issues along the lines of
+ those described in [<a href="http://tools.ietf.org/html/rfc1535" title="&quot;A Security Problem and Proposed Correction With Widely Deployed DNS Software&quot;">RFC1535</a>].
+
+ As a URI suffix has the same syntax as a relative-path reference, a
+ suffix reference cannot be used in contexts where a relative
+ reference is expected. As a result, suffix references are limited to
+ places where there is no defined base URI, such as dialog boxes and
+ off-line advertisements.
+
+<span class="h2"><h2><a name="section-5">5</a>. Reference Resolution</h2></span>
+
+ This section defines the process of resolving a URI reference within
+ a context that allows relative references so that the result is a
+ string matching the &lt;URI&gt; syntax rule of <a href="#section-3">Section 3</a>.
+
+<span class="h3"><h3><a name="section-5.1">5.1</a>. Establishing a Base URI</h3></span>
+
+ The term "relative" implies that a "base URI" exists against which
+ the relative reference is applied. Aside from fragment-only
+ references (<a href="#section-4.4">Section 4.4</a>), relative references are only usable when a
+ base URI is known. A base URI must be established by the parser
+ prior to parsing URI references that might be relative. A base URI
+ must conform to the &lt;absolute-URI&gt; syntax rule (<a href="#section-4.3">Section 4.3</a>). If the
+ base URI is obtained from a URI reference, then that reference must
+ be converted to absolute form and stripped of any fragment component
+ prior to its use as a base URI.
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 28]</span>
+<a name="page-29" id="page-29" href="#page-29"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ The base URI of a reference can be established in one of four ways,
+ discussed below in order of precedence. The order of precedence can
+ be thought of in terms of layers, where the innermost defined base
+ URI has the highest precedence. This can be visualized graphically
+ as follows:
+
+ .----------------------------------------------------------.
+ | .----------------------------------------------------. |
+ | | .----------------------------------------------. | |
+ | | | .----------------------------------------. | | |
+ | | | | .----------------------------------. | | | |
+ | | | | | &lt;relative-reference&gt; | | | | |
+ | | | | `----------------------------------' | | | |
+ | | | | (5.1.1) Base URI embedded in content | | | |
+ | | | `----------------------------------------' | | |
+ | | | (5.1.2) Base URI of the encapsulating entity | | |
+ | | | (message, representation, or none) | | |
+ | | `----------------------------------------------' | |
+ | | (5.1.3) URI used to retrieve the entity | |
+ | `----------------------------------------------------' |
+ | (5.1.4) Default Base URI (application-dependent) |
+ `----------------------------------------------------------'
+
+<span class="h4"><h4><a name="section-5.1.1">5.1.1</a>. Base URI Embedded in Content</h4></span>
+
+ Within certain media types, a base URI for relative references can be
+ embedded within the content itself so that it can be readily obtained
+ by a parser. This can be useful for descriptive documents, such as
+ tables of contents, which may be transmitted to others through
+ protocols other than their usual retrieval context (e.g., email or
+ USENET news).
+
+ It is beyond the scope of this specification to specify how, for each
+ media type, a base URI can be embedded. The appropriate syntax, when
+ available, is described by the data format specification associated
+ with each media type.
+
+<span class="h4"><h4><a name="section-5.1.2">5.1.2</a>. Base URI from the Encapsulating Entity</h4></span>
+
+ If no base URI is embedded, the base URI is defined by the
+ representation's retrieval context. For a document that is enclosed
+ within another entity, such as a message or archive, the retrieval
+ context is that entity. Thus, the default base URI of a
+ representation is the base URI of the entity in which the
+ representation is encapsulated.
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 29]</span>
+<a name="page-30" id="page-30" href="#page-30"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ A mechanism for embedding a base URI within MIME container types
+ (e.g., the message and multipart types) is defined by MHTML
+ [<a href="http://tools.ietf.org/html/rfc2557" title="&quot;MIME Encapsulation of Aggregate Documents, such as HTML (MHTML)&quot;">RFC2557</a>]. Protocols that do not use the MIME message header syntax,
+ but that do allow some form of tagged metadata to be included within
+ messages, may define their own syntax for defining a base URI as part
+ of a message.
+
+<span class="h4"><h4><a name="section-5.1.3">5.1.3</a>. Base URI from the Retrieval URI</h4></span>
+
+ If no base URI is embedded and the representation is not encapsulated
+ within some other entity, then, if a URI was used to retrieve the
+ representation, that URI shall be considered the base URI. Note that
+ if the retrieval was the result of a redirected request, the last URI
+ used (i.e., the URI that resulted in the actual retrieval of the
+ representation) is the base URI.
+
+<span class="h4"><h4><a name="section-5.1.4">5.1.4</a>. Default Base URI</h4></span>
+
+ If none of the conditions described above apply, then the base URI is
+ defined by the context of the application. As this definition is
+ necessarily application-dependent, failing to define a base URI by
+ using one of the other methods may result in the same content being
+ interpreted differently by different types of applications.
+
+ A sender of a representation containing relative references is
+ responsible for ensuring that a base URI for those references can be
+ established. Aside from fragment-only references, relative
+ references can only be used reliably in situations where the base URI
+ is well defined.
+
+<span class="h3"><h3><a name="section-5.2">5.2</a>. Relative Resolution</h3></span>
+
+ This section describes an algorithm for converting a URI reference
+ that might be relative to a given base URI into the parsed components
+ of the reference's target. The components can then be recomposed, as
+ described in <a href="#section-5.3">Section 5.3</a>, to form the target URI. This algorithm
+ provides definitive results that can be used to test the output of
+ other implementations. Applications may implement relative reference
+ resolution by using some other algorithm, provided that the results
+ match what would be given by this one.
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 30]</span>
+<a name="page-31" id="page-31" href="#page-31"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+<span class="h4"><h4><a name="section-5.2.1">5.2.1</a>. Pre-parse the Base URI</h4></span>
+
+ The base URI (Base) is established according to the procedure of
+ <a href="#section-5.1">Section 5.1</a> and parsed into the five main components described in
+ <a href="#section-3">Section 3</a>. Note that only the scheme component is required to be
+ present in a base URI; the other components may be empty or
+ undefined. A component is undefined if its associated delimiter does
+ not appear in the URI reference; the path component is never
+ undefined, though it may be empty.
+
+ Normalization of the base URI, as described in Sections 6.2.2 and
+ 6.2.3, is optional. A URI reference must be transformed to its
+ target URI before it can be normalized.
+
+<span class="h4"><h4><a name="section-5.2.2">5.2.2</a>. Transform References</h4></span>
+
+ For each URI reference (R), the following pseudocode describes an
+ algorithm for transforming R into its target URI (T):
+
+ -- The URI reference is parsed into the five URI components
+ --
+ (R.scheme, R.authority, R.path, R.query, R.fragment) = parse(R);
+
+ -- A non-strict parser may ignore a scheme in the reference
+ -- if it is identical to the base URI's scheme.
+ --
+ if ((not strict) and (R.scheme == Base.scheme)) then
+ undefine(R.scheme);
+ endif;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 31]</span>
+<a name="page-32" id="page-32" href="#page-32"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ if defined(R.scheme) then
+ T.scheme = R.scheme;
+ T.authority = R.authority;
+ T.path = remove_dot_segments(R.path);
+ T.query = R.query;
+ else
+ if defined(R.authority) then
+ T.authority = R.authority;
+ T.path = remove_dot_segments(R.path);
+ T.query = R.query;
+ else
+ if (R.path == "") then
+ T.path = Base.path;
+ if defined(R.query) then
+ T.query = R.query;
+ else
+ T.query = Base.query;
+ endif;
+ else
+ if (R.path starts-with "/") then
+ T.path = remove_dot_segments(R.path);
+ else
+ T.path = merge(Base.path, R.path);
+ T.path = remove_dot_segments(T.path);
+ endif;
+ T.query = R.query;
+ endif;
+ T.authority = Base.authority;
+ endif;
+ T.scheme = Base.scheme;
+ endif;
+
+ T.fragment = R.fragment;
+
+<span class="h4"><h4><a name="section-5.2.3">5.2.3</a>. Merge Paths</h4></span>
+
+ The pseudocode above refers to a "merge" routine for merging a
+ relative-path reference with the path of the base URI. This is
+ accomplished as follows:
+
+ o If the base URI has a defined authority component and an empty
+ path, then return a string consisting of "/" concatenated with the
+ reference's path; otherwise,
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 32]</span>
+<a name="page-33" id="page-33" href="#page-33"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ o return a string consisting of the reference's path component
+ appended to all but the last segment of the base URI's path (i.e.,
+ excluding any characters after the right-most "/" in the base URI
+ path, or excluding the entire base URI path if it does not contain
+ any "/" characters).
+
+<span class="h4"><h4><a name="section-5.2.4">5.2.4</a>. Remove Dot Segments</h4></span>
+
+ The pseudocode also refers to a "remove_dot_segments" routine for
+ interpreting and removing the special "." and ".." complete path
+ segments from a referenced path. This is done after the path is
+ extracted from a reference, whether or not the path was relative, in
+ order to remove any invalid or extraneous dot-segments prior to
+ forming the target URI. Although there are many ways to accomplish
+ this removal process, we describe a simple method using two string
+ buffers.
+
+ 1. The input buffer is initialized with the now-appended path
+ components and the output buffer is initialized to the empty
+ string.
+
+ 2. While the input buffer is not empty, loop as follows:
+
+ A. If the input buffer begins with a prefix of "../" or "./",
+ then remove that prefix from the input buffer; otherwise,
+
+ B. if the input buffer begins with a prefix of "/./" or "/.",
+ where "." is a complete path segment, then replace that
+ prefix with "/" in the input buffer; otherwise,
+
+ C. if the input buffer begins with a prefix of "/../" or "/..",
+ where ".." is a complete path segment, then replace that
+ prefix with "/" in the input buffer and remove the last
+ segment and its preceding "/" (if any) from the output
+ buffer; otherwise,
+
+ D. if the input buffer consists only of "." or "..", then remove
+ that from the input buffer; otherwise,
+
+ E. move the first path segment in the input buffer to the end of
+ the output buffer, including the initial "/" character (if
+ any) and any subsequent characters up to, but not including,
+ the next "/" character or the end of the input buffer.
+
+ 3. Finally, the output buffer is returned as the result of
+ remove_dot_segments.
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 33]</span>
+<a name="page-34" id="page-34" href="#page-34"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ Note that dot-segments are intended for use in URI references to
+ express an identifier relative to the hierarchy of names in the base
+ URI. The remove_dot_segments algorithm respects that hierarchy by
+ removing extra dot-segments rather than treat them as an error or
+ leaving them to be misinterpreted by dereference implementations.
+
+ The following illustrates how the above steps are applied for two
+ examples of merged paths, showing the state of the two buffers after
+ each step.
+
+ STEP OUTPUT BUFFER INPUT BUFFER
+
+ 1 : /a/b/c/./../../g
+ 2E: /a /b/c/./../../g
+ 2E: /a/b /c/./../../g
+ 2E: /a/b/c /./../../g
+ 2B: /a/b/c /../../g
+ 2C: /a/b /../g
+ 2C: /a /g
+ 2E: /a/g
+
+ STEP OUTPUT BUFFER INPUT BUFFER
+
+ <a href="#section-1">1</a> : mid/content=5/../6
+ 2E: mid /content=5/../6
+ 2E: mid/content=5 /../6
+ 2C: mid /6
+ 2E: mid/6
+
+ Some applications may find it more efficient to implement the
+ remove_dot_segments algorithm by using two segment stacks rather than
+ strings.
+
+ Note: Beware that some older, erroneous implementations will fail
+ to separate a reference's query component from its path component
+ prior to merging the base and reference paths, resulting in an
+ interoperability failure if the query component contains the
+ strings "/../" or "/./".
+
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 34]</span>
+<a name="page-35" id="page-35" href="#page-35"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+<span class="h3"><h3><a name="section-5.3">5.3</a>. Component Recomposition</h3></span>
+
+ Parsed URI components can be recomposed to obtain the corresponding
+ URI reference string. Using pseudocode, this would be:
+
+ result = ""
+
+ if defined(scheme) then
+ append scheme to result;
+ append ":" to result;
+ endif;
+
+ if defined(authority) then
+ append "//" to result;
+ append authority to result;
+ endif;
+
+ append path to result;
+
+ if defined(query) then
+ append "?" to result;
+ append query to result;
+ endif;
+
+ if defined(fragment) then
+ append "#" to result;
+ append fragment to result;
+ endif;
+
+ return result;
+
+ Note that we are careful to preserve the distinction between a
+ component that is undefined, meaning that its separator was not
+ present in the reference, and a component that is empty, meaning that
+ the separator was present and was immediately followed by the next
+ component separator or the end of the reference.
+
+<span class="h3"><h3><a name="section-5.4">5.4</a>. Reference Resolution Examples</h3></span>
+
+ Within a representation with a well defined base URI of
+
+ http://a/b/c/d;p?q
+
+ a relative reference is transformed to its target URI as follows.
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 35]</span>
+<a name="page-36" id="page-36" href="#page-36"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+<span class="h4"><h4><a name="section-5.4.1">5.4.1</a>. Normal Examples</h4></span>
+
+ "g:h" = "g:h"
+ "g" = "<a href="http://a/b/c/g">http://a/b/c/g</a>"
+ "./g" = "<a href="http://a/b/c/g">http://a/b/c/g</a>"
+ "g/" = "<a href="http://a/b/c/g/">http://a/b/c/g/</a>"
+ "/g" = "<a href="http://a/g">http://a/g</a>"
+ "//g" = "http://g"
+ "?y" = "http://a/b/c/d;p?y"
+ "g?y" = "<a href="http://a/b/c/g?y">http://a/b/c/g?y</a>"
+ "#s" = "http://a/b/c/d;p?q#s"
+ "g#s" = "<a href="http://a/b/c/g#s">http://a/b/c/g#s</a>"
+ "g?y#s" = "<a href="http://a/b/c/g?y#s">http://a/b/c/g?y#s</a>"
+ ";x" = "http://a/b/c/;x"
+ "g;x" = "http://a/b/c/g;x"
+ "g;x?y#s" = "http://a/b/c/g;x?y#s"
+ "" = "http://a/b/c/d;p?q"
+ "." = "<a href="http://a/b/c/">http://a/b/c/</a>"
+ "./" = "<a href="http://a/b/c/">http://a/b/c/</a>"
+ ".." = "<a href="http://a/b/">http://a/b/</a>"
+ "../" = "<a href="http://a/b/">http://a/b/</a>"
+ "../g" = "<a href="http://a/b/g">http://a/b/g</a>"
+ "../.." = "<a href="http://a/">http://a/</a>"
+ "../../" = "<a href="http://a/">http://a/</a>"
+ "../../g" = "<a href="http://a/g">http://a/g</a>"
+
+<span class="h4"><h4><a name="section-5.4.2">5.4.2</a>. Abnormal Examples</h4></span>
+
+ Although the following abnormal examples are unlikely to occur in
+ normal practice, all URI parsers should be capable of resolving them
+ consistently. Each example uses the same base as that above.
+
+ Parsers must be careful in handling cases where there are more ".."
+ segments in a relative-path reference than there are hierarchical
+ levels in the base URI's path. Note that the ".." syntax cannot be
+ used to change the authority component of a URI.
+
+ "../../../g" = "<a href="http://a/g">http://a/g</a>"
+ "../../../../g" = "<a href="http://a/g">http://a/g</a>"
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 36]</span>
+<a name="page-37" id="page-37" href="#page-37"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ Similarly, parsers must remove the dot-segments "." and ".." when
+ they are complete components of a path, but not when they are only
+ part of a segment.
+
+ "/./g" = "<a href="http://a/g">http://a/g</a>"
+ "/../g" = "<a href="http://a/g">http://a/g</a>"
+ "g." = "<a href="http://a/b/c/g">http://a/b/c/g</a>."
+ ".g" = "<a href="http://a/b/c/.g">http://a/b/c/.g</a>"
+ "g.." = "<a href="http://a/b/c/g">http://a/b/c/g</a>.."
+ "..g" = "<a href="http://a/b/c/..g">http://a/b/c/..g</a>"
+
+ Less likely are cases where the relative reference uses unnecessary
+ or nonsensical forms of the "." and ".." complete path segments.
+
+ "./../g" = "<a href="http://a/b/g">http://a/b/g</a>"
+ "./g/." = "<a href="http://a/b/c/g/">http://a/b/c/g/</a>"
+ "g/./h" = "<a href="http://a/b/c/g/h">http://a/b/c/g/h</a>"
+ "g/../h" = "<a href="http://a/b/c/h">http://a/b/c/h</a>"
+ "g;x=1/./y" = "http://a/b/c/g;x=1/y"
+ "g;x=1/../y" = "<a href="http://a/b/c/y">http://a/b/c/y</a>"
+
+ Some applications fail to separate the reference's query and/or
+ fragment components from the path component before merging it with
+ the base path and removing dot-segments. This error is rarely
+ noticed, as typical usage of a fragment never includes the hierarchy
+ ("/") character and the query component is not normally used within
+ relative references.
+
+ "g?y/./x" = "<a href="http://a/b/c/g?y/./x">http://a/b/c/g?y/./x</a>"
+ "g?y/../x" = "<a href="http://a/b/c/g?y/../x">http://a/b/c/g?y/../x</a>"
+ "g#s/./x" = "<a href="http://a/b/c/g#s/./x">http://a/b/c/g#s/./x</a>"
+ "g#s/../x" = "<a href="http://a/b/c/g#s/../x">http://a/b/c/g#s/../x</a>"
+
+ Some parsers allow the scheme name to be present in a relative
+ reference if it is the same as the base URI scheme. This is
+ considered to be a loophole in prior specifications of partial URI
+ [<a href="http://tools.ietf.org/html/rfc1630" title="&quot;Universal Resource Identifiers in WWW: A Unifying Syntax for the Expression of Names and Addresses of Objects on the Network as used in the World-Wide Web&quot;">RFC1630</a>]. Its use should be avoided but is allowed for backward
+ compatibility.
+
+ "http:g" = "http:g" ; for strict parsers
+ / "<a href="http://a/b/c/g">http://a/b/c/g</a>" ; for backward compatibility
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 37]</span>
+<a name="page-38" id="page-38" href="#page-38"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+<span class="h2"><h2><a name="section-6">6</a>. Normalization and Comparison</h2></span>
+
+ One of the most common operations on URIs is simple comparison:
+ determining whether two URIs are equivalent without using the URIs to
+ access their respective resource(s). A comparison is performed every
+ time a response cache is accessed, a browser checks its history to
+ color a link, or an XML parser processes tags within a namespace.
+ Extensive normalization prior to comparison of URIs is often used by
+ spiders and indexing engines to prune a search space or to reduce
+ duplication of request actions and response storage.
+
+ URI comparison is performed for some particular purpose. Protocols
+ or implementations that compare URIs for different purposes will
+ often be subject to differing design trade-offs in regards to how
+ much effort should be spent in reducing aliased identifiers. This
+ section describes various methods that may be used to compare URIs,
+ the trade-offs between them, and the types of applications that might
+ use them.
+
+<span class="h3"><h3><a name="section-6.1">6.1</a>. Equivalence</h3></span>
+
+ Because URIs exist to identify resources, presumably they should be
+ considered equivalent when they identify the same resource. However,
+ this definition of equivalence is not of much practical use, as there
+ is no way for an implementation to compare two resources unless it
+ has full knowledge or control of them. For this reason,
+ determination of equivalence or difference of URIs is based on string
+ comparison, perhaps augmented by reference to additional rules
+ provided by URI scheme definitions. We use the terms "different" and
+ "equivalent" to describe the possible outcomes of such comparisons,
+ but there are many application-dependent versions of equivalence.
+
+ Even though it is possible to determine that two URIs are equivalent,
+ URI comparison is not sufficient to determine whether two URIs
+ identify different resources. For example, an owner of two different
+ domain names could decide to serve the same resource from both,
+ resulting in two different URIs. Therefore, comparison methods are
+ designed to minimize false negatives while strictly avoiding false
+ positives.
+
+ In testing for equivalence, applications should not directly compare
+ relative references; the references should be converted to their
+ respective target URIs before comparison. When URIs are compared to
+ select (or avoid) a network action, such as retrieval of a
+ representation, fragment components (if any) should be excluded from
+ the comparison.
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 38]</span>
+<a name="page-39" id="page-39" href="#page-39"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+<span class="h3"><h3><a name="section-6.2">6.2</a>. Comparison Ladder</h3></span>
+
+ A variety of methods are used in practice to test URI equivalence.
+ These methods fall into a range, distinguished by the amount of
+ processing required and the degree to which the probability of false
+ negatives is reduced. As noted above, false negatives cannot be
+ eliminated. In practice, their probability can be reduced, but this
+ reduction requires more processing and is not cost-effective for all
+ applications.
+
+ If this range of comparison practices is considered as a ladder, the
+ following discussion will climb the ladder, starting with practices
+ that are cheap but have a relatively higher chance of producing false
+ negatives, and proceeding to those that have higher computational
+ cost and lower risk of false negatives.
+
+<span class="h4"><h4><a name="section-6.2.1">6.2.1</a>. Simple String Comparison</h4></span>
+
+ If two URIs, when considered as character strings, are identical,
+ then it is safe to conclude that they are equivalent. This type of
+ equivalence test has very low computational cost and is in wide use
+ in a variety of applications, particularly in the domain of parsing.
+
+ Testing strings for equivalence requires some basic precautions.
+ This procedure is often referred to as "bit-for-bit" or
+ "byte-for-byte" comparison, which is potentially misleading. Testing
+ strings for equality is normally based on pair comparison of the
+ characters that make up the strings, starting from the first and
+ proceeding until both strings are exhausted and all characters are
+ found to be equal, until a pair of characters compares unequal, or
+ until one of the strings is exhausted before the other.
+
+ This character comparison requires that each pair of characters be
+ put in comparable form. For example, should one URI be stored in a
+ byte array in EBCDIC encoding and the second in a Java String object
+ (UTF-16), bit-for-bit comparisons applied naively will produce
+ errors. It is better to speak of equality on a character-for-
+ character basis rather than on a byte-for-byte or bit-for-bit basis.
+ In practical terms, character-by-character comparisons should be done
+ codepoint-by-codepoint after conversion to a common character
+ encoding.
+
+ False negatives are caused by the production and use of URI aliases.
+ Unnecessary aliases can be reduced, regardless of the comparison
+ method, by consistently providing URI references in an already-
+ normalized form (i.e., a form identical to what would be produced
+ after normalization is applied, as described below).
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 39]</span>
+<a name="page-40" id="page-40" href="#page-40"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ Protocols and data formats often limit some URI comparisons to simple
+ string comparison, based on the theory that people and
+ implementations will, in their own best interest, be consistent in
+ providing URI references, or at least consistent enough to negate any
+ efficiency that might be obtained from further normalization.
+
+<span class="h4"><h4><a name="section-6.2.2">6.2.2</a>. Syntax-Based Normalization</h4></span>
+
+ Implementations may use logic based on the definitions provided by
+ this specification to reduce the probability of false negatives.
+ This processing is moderately higher in cost than character-for-
+ character string comparison. For example, an application using this
+ approach could reasonably consider the following two URIs equivalent:
+
+ example://a/b/c/%7Bfoo%7D
+ eXAMPLE://a/./b/../b/%63/%7bfoo%7d
+
+ Web user agents, such as browsers, typically apply this type of URI
+ normalization when determining whether a cached response is
+ available. Syntax-based normalization includes such techniques as
+ case normalization, percent-encoding normalization, and removal of
+ dot-segments.
+
+<span class="h5"><h5><a name="section-6.2.2.1">6.2.2.1</a>. Case Normalization</h5></span>
+
+ For all URIs, the hexadecimal digits within a percent-encoding
+ triplet (e.g., "%3a" versus "%3A") are case-insensitive and therefore
+ should be normalized to use uppercase letters for the digits A-F.
+
+ When a URI uses components of the generic syntax, the component
+ syntax equivalence rules always apply; namely, that the scheme and
+ host are case-insensitive and therefore should be normalized to
+ lowercase. For example, the URI &lt;HTTP://www.EXAMPLE.com/&gt; is
+ equivalent to &lt;http://www.example.com/&gt;. The other generic syntax
+ components are assumed to be case-sensitive unless specifically
+ defined otherwise by the scheme (see <a href="#section-6.2.3">Section 6.2.3</a>).
+
+<span class="h5"><h5><a name="section-6.2.2.2">6.2.2.2</a>. Percent-Encoding Normalization</h5></span>
+
+ The percent-encoding mechanism (<a href="#section-2.1">Section 2.1</a>) is a frequent source of
+ variance among otherwise identical URIs. In addition to the case
+ normalization issue noted above, some URI producers percent-encode
+ octets that do not require percent-encoding, resulting in URIs that
+ are equivalent to their non-encoded counterparts. These URIs should
+ be normalized by decoding any percent-encoded octet that corresponds
+ to an unreserved character, as described in <a href="#section-2.3">Section 2.3</a>.
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 40]</span>
+<a name="page-41" id="page-41" href="#page-41"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+<span class="h5"><h5><a name="section-6.2.2.3">6.2.2.3</a>. Path Segment Normalization</h5></span>
+
+ The complete path segments "." and ".." are intended only for use
+ within relative references (<a href="#section-4.1">Section 4.1</a>) and are removed as part of
+ the reference resolution process (<a href="#section-5.2">Section 5.2</a>). However, some
+ deployed implementations incorrectly assume that reference resolution
+ is not necessary when the reference is already a URI and thus fail to
+ remove dot-segments when they occur in non-relative paths. URI
+ normalizers should remove dot-segments by applying the
+ remove_dot_segments algorithm to the path, as described in
+ <a href="#section-5.2.4">Section 5.2.4</a>.
+
+<span class="h4"><h4><a name="section-6.2.3">6.2.3</a>. Scheme-Based Normalization</h4></span>
+
+ The syntax and semantics of URIs vary from scheme to scheme, as
+ described by the defining specification for each scheme.
+ Implementations may use scheme-specific rules, at further processing
+ cost, to reduce the probability of false negatives. For example,
+ because the "http" scheme makes use of an authority component, has a
+ default port of "80", and defines an empty path to be equivalent to
+ "/", the following four URIs are equivalent:
+
+ http://example.com
+ http://example.com/
+ <a href="http://example.com/">http://example.com:/</a>
+ <a href="http://example.com/">http://example.com:80/</a>
+
+ In general, a URI that uses the generic syntax for authority with an
+ empty path should be normalized to a path of "/". Likewise, an
+ explicit ":port", for which the port is empty or the default for the
+ scheme, is equivalent to one where the port and its ":" delimiter are
+ elided and thus should be removed by scheme-based normalization. For
+ example, the second URI above is the normal form for the "http"
+ scheme.
+
+ Another case where normalization varies by scheme is in the handling
+ of an empty authority component or empty host subcomponent. For many
+ scheme specifications, an empty authority or host is considered an
+ error; for others, it is considered equivalent to "localhost" or the
+ end-user's host. When a scheme defines a default for authority and a
+ URI reference to that default is desired, the reference should be
+ normalized to an empty authority for the sake of uniformity, brevity,
+ and internationalization. If, however, either the userinfo or port
+ subcomponents are non-empty, then the host should be given explicitly
+ even if it matches the default.
+
+ Normalization should not remove delimiters when their associated
+ component is empty unless licensed to do so by the scheme
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 41]</span>
+<a name="page-42" id="page-42" href="#page-42"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ specification. For example, the URI "http://example.com/?" cannot be
+ assumed to be equivalent to any of the examples above. Likewise, the
+ presence or absence of delimiters within a userinfo subcomponent is
+ usually significant to its interpretation. The fragment component is
+ not subject to any scheme-based normalization; thus, two URIs that
+ differ only by the suffix "#" are considered different regardless of
+ the scheme.
+
+ Some schemes define additional subcomponents that consist of case-
+ insensitive data, giving an implicit license to normalizers to
+ convert this data to a common case (e.g., all lowercase). For
+ example, URI schemes that define a subcomponent of path to contain an
+ Internet hostname, such as the "mailto" URI scheme, cause that
+ subcomponent to be case-insensitive and thus subject to case
+ normalization (e.g., "mailto:Joe@Example.COM" is equivalent to
+ "mailto:Joe@example.com", even though the generic syntax considers
+ the path component to be case-sensitive).
+
+ Other scheme-specific normalizations are possible.
+
+<span class="h4"><h4><a name="section-6.2.4">6.2.4</a>. Protocol-Based Normalization</h4></span>
+
+ Substantial effort to reduce the incidence of false negatives is
+ often cost-effective for web spiders. Therefore, they implement even
+ more aggressive techniques in URI comparison. For example, if they
+ observe that a URI such as
+
+ http://example.com/data
+
+ redirects to a URI differing only in the trailing slash
+
+ http://example.com/data/
+
+ they will likely regard the two as equivalent in the future. This
+ kind of technique is only appropriate when equivalence is clearly
+ indicated by both the result of accessing the resources and the
+ common conventions of their scheme's dereference algorithm (in this
+ case, use of redirection by HTTP origin servers to avoid problems
+ with relative references).
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 42]</span>
+<a name="page-43" id="page-43" href="#page-43"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+<span class="h2"><h2><a name="section-7">7</a>. Security Considerations</h2></span>
+
+ A URI does not in itself pose a security threat. However, as URIs
+ are often used to provide a compact set of instructions for access to
+ network resources, care must be taken to properly interpret the data
+ within a URI, to prevent that data from causing unintended access,
+ and to avoid including data that should not be revealed in plain
+ text.
+
+<span class="h3"><h3><a name="section-7.1">7.1</a>. Reliability and Consistency</h3></span>
+
+ There is no guarantee that once a URI has been used to retrieve
+ information, the same information will be retrievable by that URI in
+ the future. Nor is there any guarantee that the information
+ retrievable via that URI in the future will be observably similar to
+ that retrieved in the past. The URI syntax does not constrain how a
+ given scheme or authority apportions its namespace or maintains it
+ over time. Such guarantees can only be obtained from the person(s)
+ controlling that namespace and the resource in question. A specific
+ URI scheme may define additional semantics, such as name persistence,
+ if those semantics are required of all naming authorities for that
+ scheme.
+
+<span class="h3"><h3><a name="section-7.2">7.2</a>. Malicious Construction</h3></span>
+
+ It is sometimes possible to construct a URI so that an attempt to
+ perform a seemingly harmless, idempotent operation, such as the
+ retrieval of a representation, will in fact cause a possibly damaging
+ remote operation. The unsafe URI is typically constructed by
+ specifying a port number other than that reserved for the network
+ protocol in question. The client unwittingly contacts a site running
+ a different protocol service, and data within the URI contains
+ instructions that, when interpreted according to this other protocol,
+ cause an unexpected operation. A frequent example of such abuse has
+ been the use of a protocol-based scheme with a port component of
+ "25", thereby fooling user agent software into sending an unintended
+ or impersonating message via an SMTP server.
+
+ Applications should prevent dereference of a URI that specifies a TCP
+ port number within the "well-known port" range (0 - 1023) unless the
+ protocol being used to dereference that URI is compatible with the
+ protocol expected on that well-known port. Although IANA maintains a
+ registry of well-known ports, applications should make such
+ restrictions user-configurable to avoid preventing the deployment of
+ new services.
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 43]</span>
+<a name="page-44" id="page-44" href="#page-44"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ When a URI contains percent-encoded octets that match the delimiters
+ for a given resolution or dereference protocol (for example, CR and
+ LF characters for the TELNET protocol), these percent-encodings must
+ not be decoded before transmission across that protocol. Transfer of
+ the percent-encoding, which might violate the protocol, is less
+ harmful than allowing decoded octets to be interpreted as additional
+ operations or parameters, perhaps triggering an unexpected and
+ possibly harmful remote operation.
+
+<span class="h3"><h3><a name="section-7.3">7.3</a>. Back-End Transcoding</h3></span>
+
+ When a URI is dereferenced, the data within it is often parsed by
+ both the user agent and one or more servers. In HTTP, for example, a
+ typical user agent will parse a URI into its five major components,
+ access the authority's server, and send it the data within the
+ authority, path, and query components. A typical server will take
+ that information, parse the path into segments and the query into
+ key/value pairs, and then invoke implementation-specific handlers to
+ respond to the request. As a result, a common security concern for
+ server implementations that handle a URI, either as a whole or split
+ into separate components, is proper interpretation of the octet data
+ represented by the characters and percent-encodings within that URI.
+
+ Percent-encoded octets must be decoded at some point during the
+ dereference process. Applications must split the URI into its
+ components and subcomponents prior to decoding the octets, as
+ otherwise the decoded octets might be mistaken for delimiters.
+ Security checks of the data within a URI should be applied after
+ decoding the octets. Note, however, that the "%00" percent-encoding
+ (NUL) may require special handling and should be rejected if the
+ application is not expecting to receive raw data within a component.
+
+ Special care should be taken when the URI path interpretation process
+ involves the use of a back-end file system or related system
+ functions. File systems typically assign an operational meaning to
+ special characters, such as the "/", "\", ":", "[", and "]"
+ characters, and to special device names like ".", "..", "...", "aux",
+ "lpt", etc. In some cases, merely testing for the existence of such
+ a name will cause the operating system to pause or invoke unrelated
+ system calls, leading to significant security concerns regarding
+ denial of service and unintended data transfer. It would be
+ impossible for this specification to list all such significant
+ characters and device names. Implementers should research the
+ reserved names and characters for the types of storage device that
+ may be attached to their applications and restrict the use of data
+ obtained from URI components accordingly.
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 44]</span>
+<a name="page-45" id="page-45" href="#page-45"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+<span class="h3"><h3><a name="section-7.4">7.4</a>. Rare IP Address Formats</h3></span>
+
+ Although the URI syntax for IPv4address only allows the common
+ dotted-decimal form of IPv4 address literal, many implementations
+ that process URIs make use of platform-dependent system routines,
+ such as gethostbyname() and inet_aton(), to translate the string
+ literal to an actual IP address. Unfortunately, such system routines
+ often allow and process a much larger set of formats than those
+ described in <a href="#section-3.2.2">Section 3.2.2</a>.
+
+ For example, many implementations allow dotted forms of three
+ numbers, wherein the last part is interpreted as a 16-bit quantity
+ and placed in the right-most two bytes of the network address (e.g.,
+ a Class B network). Likewise, a dotted form of two numbers means
+ that the last part is interpreted as a 24-bit quantity and placed in
+ the right-most three bytes of the network address (Class A), and a
+ single number (without dots) is interpreted as a 32-bit quantity and
+ stored directly in the network address. Adding further to the
+ confusion, some implementations allow each dotted part to be
+ interpreted as decimal, octal, or hexadecimal, as specified in the C
+ language (i.e., a leading 0x or 0X implies hexadecimal; a leading 0
+ implies octal; otherwise, the number is interpreted as decimal).
+
+ These additional IP address formats are not allowed in the URI syntax
+ due to differences between platform implementations. However, they
+ can become a security concern if an application attempts to filter
+ access to resources based on the IP address in string literal format.
+ If this filtering is performed, literals should be converted to
+ numeric form and filtered based on the numeric value, and not on a
+ prefix or suffix of the string form.
+
+<span class="h3"><h3><a name="section-7.5">7.5</a>. Sensitive Information</h3></span>
+
+ URI producers should not provide a URI that contains a username or
+ password that is intended to be secret. URIs are frequently
+ displayed by browsers, stored in clear text bookmarks, and logged by
+ user agent history and intermediary applications (proxies). A
+ password appearing within the userinfo component is deprecated and
+ should be considered an error (or simply ignored) except in those
+ rare cases where the 'password' parameter is intended to be public.
+
+<span class="h3"><h3><a name="section-7.6">7.6</a>. Semantic Attacks</h3></span>
+
+ Because the userinfo subcomponent is rarely used and appears before
+ the host in the authority component, it can be used to construct a
+ URI intended to mislead a human user by appearing to identify one
+ (trusted) naming authority while actually identifying a different
+ authority hidden behind the noise. For example
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 45]</span>
+<a name="page-46" id="page-46" href="#page-46"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ ftp://cnn.example.com&amp;story=breaking_news@10.0.0.1/top_story.htm
+
+ might lead a human user to assume that the host is 'cnn.example.com',
+ whereas it is actually '10.0.0.1'. Note that a misleading userinfo
+ subcomponent could be much longer than the example above.
+
+ A misleading URI, such as that above, is an attack on the user's
+ preconceived notions about the meaning of a URI rather than an attack
+ on the software itself. User agents may be able to reduce the impact
+ of such attacks by distinguishing the various components of the URI
+ when they are rendered, such as by using a different color or tone to
+ render userinfo if any is present, though there is no panacea. More
+ information on URI-based semantic attacks can be found in [<a href="#ref-Siedzik" title="&quot;Semantic Attacks: What&amp;#39;s in a URL?&quot;">Siedzik</a>].
+
+<span class="h2"><h2><a name="section-8">8</a>. IANA Considerations</h2></span>
+
+ URI scheme names, as defined by &lt;scheme&gt; in <a href="#section-3.1">Section 3.1</a>, form a
+ registered namespace that is managed by IANA according to the
+ procedures defined in [<a href="#ref-BCP35" title="&quot;Registration Procedures for URL Scheme Names&quot;">BCP35</a>]. No IANA actions are required by this
+ document.
+
+<span class="h2"><h2><a name="section-9">9</a>. Acknowledgements</h2></span>
+
+ This specification is derived from <a href="http://tools.ietf.org/html/rfc2396">RFC 2396</a> [<a href="http://tools.ietf.org/html/rfc2396" title="&quot;Uniform Resource Identifiers (URI): Generic Syntax&quot;">RFC2396</a>], <a href="http://tools.ietf.org/html/rfc1808">RFC 1808</a>
+ [<a href="http://tools.ietf.org/html/rfc1808" title="&quot;Relative Uniform Resource Locators&quot;">RFC1808</a>], and <a href="http://tools.ietf.org/html/rfc1738">RFC 1738</a> [<a href="http://tools.ietf.org/html/rfc1738" title="&quot;Uniform Resource Locators (URL)&quot;">RFC1738</a>]; the acknowledgements in those
+ documents still apply. It also incorporates the update (with
+ corrections) for IPv6 literals in the host syntax, as defined by
+ Robert M. Hinden, Brian E. Carpenter, and Larry Masinter in
+ [<a href="http://tools.ietf.org/html/rfc2732" title="&quot;Format for Literal IPv6 Addresses in URL&amp;#39;s&quot;">RFC2732</a>]. In addition, contributions by Gisle Aas, Reese Anschultz,
+ Daniel Barclay, Tim Bray, Mike Brown, Rob Cameron, Jeremy Carroll,
+ Dan Connolly, Adam M. Costello, John Cowan, Jason Diamond, Martin
+ Duerst, Stefan Eissing, Clive D.W. Feather, Al Gilman, Tony Hammond,
+ Elliotte Harold, Pat Hayes, Henry Holtzman, Ian B. Jacobs, Michael
+ Kay, John C. Klensin, Graham Klyne, Dan Kohn, Bruce Lilly, Andrew
+ Main, Dave McAlpin, Ira McDonald, Michael Mealling, Ray Merkert,
+ Stephen Pollei, Julian Reschke, Tomas Rokicki, Miles Sabin, Kai
+ Schaetzl, Mark Thomson, Ronald Tschalaer, Norm Walsh, Marc Warne,
+ Stuart Williams, and Henry Zongaro are gratefully acknowledged.
+
+<span class="h2"><h2><a name="section-10">10</a>. References</h2></span>
+
+<span class="h3"><h3><a name="section-10.1">10.1</a>. Normative References</h3></span>
+
+ [<a name="ref-ASCII" id="ref-ASCII">ASCII</a>] American National Standards Institute, "Coded Character
+ Set -- 7-bit American Standard Code for Information
+ Interchange", ANSI X3.4, 1986.
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 46]</span>
+<a name="page-47" id="page-47" href="#page-47"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ [<a name="ref-RFC2234" id="ref-RFC2234">RFC2234</a>] Crocker, D. and P. Overell, "Augmented BNF for Syntax
+ Specifications: ABNF", <a href="http://tools.ietf.org/html/rfc2234">RFC 2234</a>, November 1997.
+
+ [<a name="ref-STD63" id="ref-STD63">STD63</a>] Yergeau, F., "UTF-8, a transformation format of
+ ISO 10646", STD 63, <a href="http://tools.ietf.org/html/rfc3629">RFC 3629</a>, November 2003.
+
+ [<a name="ref-UCS" id="ref-UCS">UCS</a>] International Organization for Standardization,
+ "Information Technology - Universal Multiple-Octet Coded
+ Character Set (UCS)", ISO/IEC 10646:2003, December 2003.
+
+<span class="h3"><h3><a name="section-10.2">10.2</a>. Informative References</h3></span>
+
+ [<a name="ref-BCP19" id="ref-BCP19">BCP19</a>] Freed, N. and J. Postel, "IANA Charset Registration
+ Procedures", <a href="http://tools.ietf.org/html/bcp19">BCP 19</a>, <a href="http://tools.ietf.org/html/rfc2978">RFC 2978</a>, October 2000.
+
+ [<a name="ref-BCP35" id="ref-BCP35">BCP35</a>] Petke, R. and I. King, "Registration Procedures for URL
+ Scheme Names", <a href="http://tools.ietf.org/html/bcp35">BCP 35</a>, <a href="http://tools.ietf.org/html/rfc2717">RFC 2717</a>, November 1999.
+
+ [<a name="ref-RFC0952" id="ref-RFC0952">RFC0952</a>] Harrenstien, K., Stahl, M., and E. Feinler, "DoD Internet
+ host table specification", <a href="http://tools.ietf.org/html/rfc952">RFC 952</a>, October 1985.
+
+ [<a name="ref-RFC1034" id="ref-RFC1034">RFC1034</a>] Mockapetris, P., "Domain names - concepts and facilities",
+ STD 13, <a href="http://tools.ietf.org/html/rfc1034">RFC 1034</a>, November 1987.
+
+ [<a name="ref-RFC1123" id="ref-RFC1123">RFC1123</a>] Braden, R., "Requirements for Internet Hosts - Application
+ and Support", STD 3, <a href="http://tools.ietf.org/html/rfc1123">RFC 1123</a>, October 1989.
+
+ [<a name="ref-RFC1535" id="ref-RFC1535">RFC1535</a>] Gavron, E., "A Security Problem and Proposed Correction
+ With Widely Deployed DNS Software", <a href="http://tools.ietf.org/html/rfc1535">RFC 1535</a>,
+ October 1993.
+
+ [<a name="ref-RFC1630" id="ref-RFC1630">RFC1630</a>] Berners-Lee, T., "Universal Resource Identifiers in WWW: A
+ Unifying Syntax for the Expression of Names and Addresses
+ of Objects on the Network as used in the World-Wide Web",
+ <a href="http://tools.ietf.org/html/rfc1630">RFC 1630</a>, June 1994.
+
+ [<a name="ref-RFC1736" id="ref-RFC1736">RFC1736</a>] Kunze, J., "Functional Recommendations for Internet
+ Resource Locators", <a href="http://tools.ietf.org/html/rfc1736">RFC 1736</a>, February 1995.
+
+ [<a name="ref-RFC1737" id="ref-RFC1737">RFC1737</a>] Sollins, K. and L. Masinter, "Functional Requirements for
+ Uniform Resource Names", <a href="http://tools.ietf.org/html/rfc1737">RFC 1737</a>, December 1994.
+
+ [<a name="ref-RFC1738" id="ref-RFC1738">RFC1738</a>] Berners-Lee, T., Masinter, L., and M. McCahill, "Uniform
+ Resource Locators (URL)", <a href="http://tools.ietf.org/html/rfc1738">RFC 1738</a>, December 1994.
+
+ [<a name="ref-RFC1808" id="ref-RFC1808">RFC1808</a>] Fielding, R., "Relative Uniform Resource Locators",
+ <a href="http://tools.ietf.org/html/rfc1808">RFC 1808</a>, June 1995.
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 47]</span>
+<a name="page-48" id="page-48" href="#page-48"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ [<a name="ref-RFC2046" id="ref-RFC2046">RFC2046</a>] Freed, N. and N. Borenstein, "Multipurpose Internet Mail
+ Extensions (MIME) Part Two: Media Types", <a href="http://tools.ietf.org/html/rfc2046">RFC 2046</a>,
+ November 1996.
+
+ [<a name="ref-RFC2141" id="ref-RFC2141">RFC2141</a>] Moats, R., "URN Syntax", <a href="http://tools.ietf.org/html/rfc2141">RFC 2141</a>, May 1997.
+
+ [<a name="ref-RFC2396" id="ref-RFC2396">RFC2396</a>] Berners-Lee, T., Fielding, R., and L. Masinter, "Uniform
+ Resource Identifiers (URI): Generic Syntax", <a href="http://tools.ietf.org/html/rfc2396">RFC 2396</a>,
+ August 1998.
+
+ [<a name="ref-RFC2518" id="ref-RFC2518">RFC2518</a>] Goland, Y., Whitehead, E., Faizi, A., Carter, S., and D.
+ Jensen, "HTTP Extensions for Distributed Authoring --
+ WEBDAV", <a href="http://tools.ietf.org/html/rfc2518">RFC 2518</a>, February 1999.
+
+ [<a name="ref-RFC2557" id="ref-RFC2557">RFC2557</a>] Palme, J., Hopmann, A., and N. Shelness, "MIME
+ Encapsulation of Aggregate Documents, such as HTML
+ (MHTML)", <a href="http://tools.ietf.org/html/rfc2557">RFC 2557</a>, March 1999.
+
+ [<a name="ref-RFC2718" id="ref-RFC2718">RFC2718</a>] Masinter, L., Alvestrand, H., Zigmond, D., and R. Petke,
+ "Guidelines for new URL Schemes", <a href="http://tools.ietf.org/html/rfc2718">RFC 2718</a>, November 1999.
+
+ [<a name="ref-RFC2732" id="ref-RFC2732">RFC2732</a>] Hinden, R., Carpenter, B., and L. Masinter, "Format for
+ Literal IPv6 Addresses in URL's", <a href="http://tools.ietf.org/html/rfc2732">RFC 2732</a>, December 1999.
+
+ [<a name="ref-RFC3305" id="ref-RFC3305">RFC3305</a>] Mealling, M. and R. Denenberg, "Report from the Joint
+ W3C/IETF URI Planning Interest Group: Uniform Resource
+ Identifiers (URIs), URLs, and Uniform Resource Names
+ (URNs): Clarifications and Recommendations", <a href="http://tools.ietf.org/html/rfc3305">RFC 3305</a>,
+ August 2002.
+
+ [<a name="ref-RFC3490" id="ref-RFC3490">RFC3490</a>] Faltstrom, P., Hoffman, P., and A. Costello,
+ "Internationalizing Domain Names in Applications (IDNA)",
+ <a href="http://tools.ietf.org/html/rfc3490">RFC 3490</a>, March 2003.
+
+ [<a name="ref-RFC3513" id="ref-RFC3513">RFC3513</a>] Hinden, R. and S. Deering, "Internet Protocol Version 6
+ (IPv6) Addressing Architecture", <a href="http://tools.ietf.org/html/rfc3513">RFC 3513</a>, April 2003.
+
+ [<a name="ref-Siedzik" id="ref-Siedzik">Siedzik</a>] Siedzik, R., "Semantic Attacks: What's in a URL?",
+ April 2001, &lt;<a href="http://www.giac.org/practical/gsec/Richard_Siedzik_GSEC.pdf">http://www.giac.org/practical/gsec/</a>
+ <a href="http://www.giac.org/practical/gsec/Richard_Siedzik_GSEC.pdf">Richard_Siedzik_GSEC.pdf</a>&gt;.
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 48]</span>
+<a name="page-49" id="page-49" href="#page-49"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+Appendix A. Collected ABNF for URI
+
+ URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+
+ hier-part = "//" authority path-abempty
+ / path-absolute
+ / path-rootless
+ / path-empty
+
+ URI-reference = URI / relative-ref
+
+ absolute-URI = scheme ":" hier-part [ "?" query ]
+
+ relative-ref = relative-part [ "?" query ] [ "#" fragment ]
+
+ relative-part = "//" authority path-abempty
+ / path-absolute
+ / path-noscheme
+ / path-empty
+
+ scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+
+ authority = [ userinfo "@" ] host [ ":" port ]
+ userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+ host = IP-literal / IPv4address / reg-name
+ port = *DIGIT
+
+ IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+
+ IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+
+ IPv6address = 6( h16 ":" ) ls32
+ / "::" 5( h16 ":" ) ls32
+ / [ h16 ] "::" 4( h16 ":" ) ls32
+ / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+ / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+ / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
+ / [ *4( h16 ":" ) h16 ] "::" ls32
+ / [ *5( h16 ":" ) h16 ] "::" h16
+ / [ *6( h16 ":" ) h16 ] "::"
+
+ h16 = 1*4HEXDIG
+ ls32 = ( h16 ":" h16 ) / IPv4address
+ IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 49]</span>
+<a name="page-50" id="page-50" href="#page-50"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ dec-octet = DIGIT ; 0-9
+ / %x31-39 DIGIT ; 10-99
+ / "1" 2DIGIT ; 100-199
+ / "2" %x30-34 DIGIT ; 200-249
+ / "25" %x30-35 ; 250-255
+
+ reg-name = *( unreserved / pct-encoded / sub-delims )
+
+ path = path-abempty ; begins with "/" or is empty
+ / path-absolute ; begins with "/" but not "//"
+ / path-noscheme ; begins with a non-colon segment
+ / path-rootless ; begins with a segment
+ / path-empty ; zero characters
+
+ path-abempty = *( "/" segment )
+ path-absolute = "/" [ segment-nz *( "/" segment ) ]
+ path-noscheme = segment-nz-nc *( "/" segment )
+ path-rootless = segment-nz *( "/" segment )
+ path-empty = 0&lt;pchar&gt;
+
+ segment = *pchar
+ segment-nz = 1*pchar
+ segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+ ; non-zero-length segment without any colon ":"
+
+ pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+
+ query = *( pchar / "/" / "?" )
+
+ fragment = *( pchar / "/" / "?" )
+
+ pct-encoded = "%" HEXDIG HEXDIG
+
+ unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ reserved = gen-delims / sub-delims
+ gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")"
+ / "*" / "+" / "," / ";" / "="
+
+Appendix B. Parsing a URI Reference with a Regular Expression
+
+ As the "first-match-wins" algorithm is identical to the "greedy"
+ disambiguation method used by POSIX regular expressions, it is
+ natural and commonplace to use a regular expression for parsing the
+ potential five components of a URI reference.
+
+ The following line is the regular expression for breaking-down a
+ well-formed URI reference into its components.
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 50]</span>
+<a name="page-51" id="page-51" href="#page-51"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+ 12 3 4 5 6 7 8 9
+
+ The numbers in the second line above are only to assist readability;
+ they indicate the reference points for each subexpression (i.e., each
+ paired parenthesis). We refer to the value matched for subexpression
+ &lt;n&gt; as $&lt;n&gt;. For example, matching the above expression to
+
+ <a href="http://www.ics.uci.edu/pub/ietf/uri/#Related">http://www.ics.uci.edu/pub/ietf/uri/#Related</a>
+
+ results in the following subexpression matches:
+
+ $1 = http:
+ $2 = http
+ $3 = //www.ics.uci.edu
+ $4 = www.ics.uci.edu
+ $5 = /pub/ietf/uri/
+ $6 = &lt;undefined&gt;
+ $7 = &lt;undefined&gt;
+ $8 = #Related
+ $9 = Related
+
+ where &lt;undefined&gt; indicates that the component is not present, as is
+ the case for the query component in the above example. Therefore, we
+ can determine the value of the five components as
+
+ scheme = $2
+ authority = $4
+ path = $5
+ query = $7
+ fragment = $9
+
+ Going in the opposite direction, we can recreate a URI reference from
+ its components by using the algorithm of <a href="#section-5.3">Section 5.3</a>.
+
+Appendix C. Delimiting a URI in Context
+
+ URIs are often transmitted through formats that do not provide a
+ clear context for their interpretation. For example, there are many
+ occasions when a URI is included in plain text; examples include text
+ sent in email, USENET news, and on printed paper. In such cases, it
+ is important to be able to delimit the URI from the rest of the text,
+ and in particular from punctuation marks that might be mistaken for
+ part of the URI.
+
+ In practice, URIs are delimited in a variety of ways, but usually
+ within double-quotes "http://example.com/", angle brackets
+ &lt;http://example.com/&gt;, or just by using whitespace:
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 51]</span>
+<a name="page-52" id="page-52" href="#page-52"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ http://example.com/
+
+ These wrappers do not form part of the URI.
+
+ In some cases, extra whitespace (spaces, line-breaks, tabs, etc.) may
+ have to be added to break a long URI across lines. The whitespace
+ should be ignored when the URI is extracted.
+
+ No whitespace should be introduced after a hyphen ("-") character.
+ Because some typesetters and printers may (erroneously) introduce a
+ hyphen at the end of line when breaking it, the interpreter of a URI
+ containing a line break immediately after a hyphen should ignore all
+ whitespace around the line break and should be aware that the hyphen
+ may or may not actually be part of the URI.
+
+ Using &lt;&gt; angle brackets around each URI is especially recommended as
+ a delimiting style for a reference that contains embedded whitespace.
+
+ The prefix "URL:" (with or without a trailing space) was formerly
+ recommended as a way to help distinguish a URI from other bracketed
+ designators, though it is not commonly used in practice and is no
+ longer recommended.
+
+ For robustness, software that accepts user-typed URI should attempt
+ to recognize and strip both delimiters and embedded whitespace.
+
+ For example, the text
+
+ Yes, Jim, I found it under "<a href="http://www.w3.org/Addressing/">http://www.w3.org/Addressing/</a>",
+ but you can probably pick it up from &lt;ftp://foo.example.
+ http://www.ics.uci.edu/pub/
+ <a href="http://www.ics.uci.edu/pub/ietf/uri/historical.html#WARNING">ietf/uri/historical.html#WARNING</a>&gt;.
+
+ contains the URI references
+
+ <a href="http://www.w3.org/Addressing/">http://www.w3.org/Addressing/</a>
+ ftp://foo.example.com/rfc/
+ <a href="http://www.ics.uci.edu/pub/ietf/uri/historical.html#WARNING">http://www.ics.uci.edu/pub/ietf/uri/historical.html#WARNING</a>
+
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 52]</span>
+<a name="page-53" id="page-53" href="#page-53"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+Appendix D. Changes from <a href="http://tools.ietf.org/html/rfc2396">RFC 2396</a>
+
+D.1. Additions
+
+ An ABNF rule for URI has been introduced to correspond to one common
+ usage of the term: an absolute URI with optional fragment.
+
+ IPv6 (and later) literals have been added to the list of possible
+ identifiers for the host portion of an authority component, as
+ described by [<a href="http://tools.ietf.org/html/rfc2732" title="&quot;Format for Literal IPv6 Addresses in URL&amp;#39;s&quot;">RFC2732</a>], with the addition of "[" and "]" to the
+ reserved set and a version flag to anticipate future versions of IP
+ literals. Square brackets are now specified as reserved within the
+ authority component and are not allowed outside their use as
+ delimiters for an IP literal within host. In order to make this
+ change without changing the technical definition of the path, query,
+ and fragment components, those rules were redefined to directly
+ specify the characters allowed.
+
+ As [<a href="http://tools.ietf.org/html/rfc2732" title="&quot;Format for Literal IPv6 Addresses in URL&amp;#39;s&quot;">RFC2732</a>] defers to [<a href="http://tools.ietf.org/html/rfc3513" title="&quot;Internet Protocol Version 6 (IPv6) Addressing Architecture&quot;">RFC3513</a>] for definition of an IPv6 literal
+ address, which, unfortunately, lacks an ABNF description of
+ IPv6address, we created a new ABNF rule for IPv6address that matches
+ the text representations defined by <a href="#section-2.2">Section 2.2</a> of [<a href="http://tools.ietf.org/html/rfc3513" title="&quot;Internet Protocol Version 6 (IPv6) Addressing Architecture&quot;">RFC3513</a>].
+ Likewise, the definition of IPv4address has been improved in order to
+ limit each decimal octet to the range 0-255.
+
+ <a href="#section-6">Section 6</a>, on URI normalization and comparison, has been completely
+ rewritten and extended by using input from Tim Bray and discussion
+ within the W3C Technical Architecture Group.
+
+D.2. Modifications
+
+ The ad-hoc BNF syntax of <a href="http://tools.ietf.org/html/rfc2396">RFC 2396</a> has been replaced with the ABNF of
+ [<a href="http://tools.ietf.org/html/rfc2234" title="&quot;Augmented BNF for Syntax Specifications: ABNF&quot;">RFC2234</a>]. This change required all rule names that formerly
+ included underscore characters to be renamed with a dash instead. In
+ addition, a number of syntax rules have been eliminated or simplified
+ to make the overall grammar more comprehensible. Specifications that
+ refer to the obsolete grammar rules may be understood by replacing
+ those rules according to the following table:
+
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 53]</span>
+<a name="page-54" id="page-54" href="#page-54"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ +----------------+--------------------------------------------------+
+ | obsolete rule | translation |
+ +----------------+--------------------------------------------------+
+ | absoluteURI | absolute-URI |
+ | relativeURI | relative-part [ "?" query ] |
+ | hier_part | ( "//" authority path-abempty / |
+ | | path-absolute ) [ "?" query ] |
+ | | |
+ | opaque_part | path-rootless [ "?" query ] |
+ | net_path | "//" authority path-abempty |
+ | abs_path | path-absolute |
+ | rel_path | path-rootless |
+ | rel_segment | segment-nz-nc |
+ | reg_name | reg-name |
+ | server | authority |
+ | hostport | host [ ":" port ] |
+ | hostname | reg-name |
+ | path_segments | path-abempty |
+ | param | *&lt;pchar excluding ";"&gt; |
+ | | |
+ | uric | unreserved / pct-encoded / ";" / "?" / ":" |
+ | | / "@" / "&amp;" / "=" / "+" / "$" / "," / "/" |
+ | | |
+ | uric_no_slash | unreserved / pct-encoded / ";" / "?" / ":" |
+ | | / "@" / "&amp;" / "=" / "+" / "$" / "," |
+ | | |
+ | mark | "-" / "_" / "." / "!" / "~" / "*" / "'" |
+ | | / "(" / ")" |
+ | | |
+ | escaped | pct-encoded |
+ | hex | HEXDIG |
+ | alphanum | ALPHA / DIGIT |
+ +----------------+--------------------------------------------------+
+
+ Use of the above obsolete rules for the definition of scheme-specific
+ syntax is deprecated.
+
+ <a href="#section-2">Section 2</a>, on characters, has been rewritten to explain what
+ characters are reserved, when they are reserved, and why they are
+ reserved, even when they are not used as delimiters by the generic
+ syntax. The mark characters that are typically unsafe to decode,
+ including the exclamation mark ("!"), asterisk ("*"), single-quote
+ ("'"), and open and close parentheses ("(" and ")"), have been moved
+ to the reserved set in order to clarify the distinction between
+ reserved and unreserved and, hopefully, to answer the most common
+ question of scheme designers. Likewise, the section on
+ percent-encoded characters has been rewritten, and URI normalizers
+ are now given license to decode any percent-encoded octets
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 54]</span>
+<a name="page-55" id="page-55" href="#page-55"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ corresponding to unreserved characters. In general, the terms
+ "escaped" and "unescaped" have been replaced with "percent-encoded"
+ and "decoded", respectively, to reduce confusion with other forms of
+ escape mechanisms.
+
+ The ABNF for URI and URI-reference has been redesigned to make them
+ more friendly to LALR parsers and to reduce complexity. As a result,
+ the layout form of syntax description has been removed, along with
+ the uric, uric_no_slash, opaque_part, net_path, abs_path, rel_path,
+ path_segments, rel_segment, and mark rules. All references to
+ "opaque" URIs have been replaced with a better description of how the
+ path component may be opaque to hierarchy. The relativeURI rule has
+ been replaced with relative-ref to avoid unnecessary confusion over
+ whether they are a subset of URI. The ambiguity regarding the
+ parsing of URI-reference as a URI or a relative-ref with a colon in
+ the first segment has been eliminated through the use of five
+ separate path matching rules.
+
+ The fragment identifier has been moved back into the section on
+ generic syntax components and within the URI and relative-ref rules,
+ though it remains excluded from absolute-URI. The number sign ("#")
+ character has been moved back to the reserved set as a result of
+ reintegrating the fragment syntax.
+
+ The ABNF has been corrected to allow the path component to be empty.
+ This also allows an absolute-URI to consist of nothing after the
+ "scheme:", as is present in practice with the "dav:" namespace
+ [<a href="http://tools.ietf.org/html/rfc2518" title="&quot;HTTP Extensions for Distributed Authoring -- WEBDAV&quot;">RFC2518</a>] and with the "about:" scheme used internally by many WWW
+ browser implementations. The ambiguity regarding the boundary
+ between authority and path has been eliminated through the use of
+ five separate path matching rules.
+
+ Registry-based naming authorities that use the generic syntax are now
+ defined within the host rule. This change allows current
+ implementations, where whatever name provided is simply fed to the
+ local name resolution mechanism, to be consistent with the
+ specification. It also removes the need to re-specify DNS name
+ formats here. Furthermore, it allows the host component to contain
+ percent-encoded octets, which is necessary to enable
+ internationalized domain names to be provided in URIs, processed in
+ their native character encodings at the application layers above URI
+ processing, and passed to an IDNA library as a registered name in the
+ UTF-8 character encoding. The server, hostport, hostname,
+ domainlabel, toplabel, and alphanum rules have been removed.
+
+ The resolving relative references algorithm of [<a href="http://tools.ietf.org/html/rfc2396" title="&quot;Uniform Resource Identifiers (URI): Generic Syntax&quot;">RFC2396</a>] has been
+ rewritten with pseudocode for this revision to improve clarity and
+ fix the following issues:
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 55]</span>
+<a name="page-56" id="page-56" href="#page-56"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ o [<a href="http://tools.ietf.org/html/rfc2396" title="&quot;Uniform Resource Identifiers (URI): Generic Syntax&quot;">RFC2396</a>] <a href="#section-5.2">section 5.2</a>, step 6a, failed to account for a base URI
+ with no path.
+
+ o Restored the behavior of [<a href="http://tools.ietf.org/html/rfc1808" title="&quot;Relative Uniform Resource Locators&quot;">RFC1808</a>] where, if the reference
+ contains an empty path and a defined query component, the target
+ URI inherits the base URI's path component.
+
+ o The determination of whether a URI reference is a same-document
+ reference has been decoupled from the URI parser, simplifying the
+ URI processing interface within applications in a way consistent
+ with the internal architecture of deployed URI processing
+ implementations. The determination is now based on comparison to
+ the base URI after transforming a reference to absolute form,
+ rather than on the format of the reference itself. This change
+ may result in more references being considered "same-document"
+ under this specification than there would be under the rules given
+ in <a href="http://tools.ietf.org/html/rfc2396">RFC 2396</a>, especially when normalization is used to reduce
+ aliases. However, it does not change the status of existing
+ same-document references.
+
+ o Separated the path merge routine into two routines: merge, for
+ describing combination of the base URI path with a relative-path
+ reference, and remove_dot_segments, for describing how to remove
+ the special "." and ".." segments from a composed path. The
+ remove_dot_segments algorithm is now applied to all URI reference
+ paths in order to match common implementations and to improve the
+ normalization of URIs in practice. This change only impacts the
+ parsing of abnormal references and same-scheme references wherein
+ the base URI has a non-hierarchical path.
+
+Index
+
+ A
+ ABNF 11
+ absolute 27
+ absolute-path 26
+ absolute-URI 27
+ access 9
+ authority 17, 18
+
+ B
+ base URI 28
+
+ C
+ character encoding 4
+ character 4
+ characters 8, 11
+ coded character set 4
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 56]</span>
+<a name="page-57" id="page-57" href="#page-57"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ D
+ dec-octet 20
+ dereference 9
+ dot-segments 23
+
+ F
+ fragment 16, 24
+
+ G
+ gen-delims 13
+ generic syntax 6
+
+ H
+ h16 20
+ hier-part 16
+ hierarchical 10
+ host 18
+
+ I
+ identifier 5
+ IP-literal 19
+ IPv4 20
+ IPv4address 19, 20
+ IPv6 19
+ IPv6address 19, 20
+ IPvFuture 19
+
+ L
+ locator 7
+ ls32 20
+
+ M
+ merge 32
+
+ N
+ name 7
+ network-path 26
+
+ P
+ path 16, 22, 26
+ path-abempty 22
+ path-absolute 22
+ path-empty 22
+ path-noscheme 22
+ path-rootless 22
+ path-abempty 16, 22, 26
+ path-absolute 16, 22, 26
+ path-empty 16, 22, 26
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 57]</span>
+<a name="page-58" id="page-58" href="#page-58"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ path-rootless 16, 22
+ pchar 23
+ pct-encoded 12
+ percent-encoding 12
+ port 22
+
+ Q
+ query 16, 23
+
+ R
+ reg-name 21
+ registered name 20
+ relative 10, 28
+ relative-path 26
+ relative-ref 26
+ remove_dot_segments 33
+ representation 9
+ reserved 12
+ resolution 9, 28
+ resource 5
+ retrieval 9
+
+ S
+ same-document 27
+ sameness 9
+ scheme 16, 17
+ segment 22, 23
+ segment-nz 23
+ segment-nz-nc 23
+ sub-delims 13
+ suffix 27
+
+ T
+ transcription 8
+
+ U
+ uniform 4
+ unreserved 13
+ URI grammar
+ absolute-URI 27
+ ALPHA 11
+ authority 18
+ CR 11
+ dec-octet 20
+ DIGIT 11
+ DQUOTE 11
+ fragment 24
+ gen-delims 13
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 58]</span>
+<a name="page-59" id="page-59" href="#page-59"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+ h16 20
+ HEXDIG 11
+ hier-part 16
+ host 19
+ IP-literal 19
+ IPv4address 20
+ IPv6address 20
+ IPvFuture 19
+ LF 11
+ ls32 20
+ OCTET 11
+ path 22
+ path-abempty 22
+ path-absolute 22
+ path-empty 22
+ path-noscheme 22
+ path-rootless 22
+ pchar 23
+ pct-encoded 12
+ port 22
+ query 24
+ reg-name 21
+ relative-ref 26
+ reserved 13
+ scheme 17
+ segment 23
+ segment-nz 23
+ segment-nz-nc 23
+ SP 11
+ sub-delims 13
+ unreserved 13
+ URI 16
+ URI-reference 25
+ userinfo 18
+ URI 16
+ URI-reference 25
+ URL 7
+ URN 7
+ userinfo 18
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 59]</span>
+<a name="page-60" id="page-60" href="#page-60"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+Authors' Addresses
+
+ Tim Berners-Lee
+ World Wide Web Consortium
+ Massachusetts Institute of Technology
+ 77 Massachusetts Avenue
+ Cambridge, MA 02139
+ USA
+
+ Phone: +1-617-253-5702
+ Fax: +1-617-258-5999
+ EMail: timbl@w3.org
+ URI: <a href="http://www.w3.org/People/Berners-Lee/">http://www.w3.org/People/Berners-Lee/</a>
+
+
+ Roy T. Fielding
+ Day Software
+ 5251 California Ave., Suite 110
+ Irvine, CA 92617
+ USA
+
+ Phone: +1-949-679-2960
+ Fax: +1-949-679-2972
+ EMail: fielding@gbiv.com
+ URI: <a href="http://roy.gbiv.com/">http://roy.gbiv.com/</a>
+
+
+ Larry Masinter
+ Adobe Systems Incorporated
+ 345 Park Ave
+ San Jose, CA 95110
+ USA
+
+ Phone: +1-408-536-3024
+ EMail: LMM@acm.org
+ URI: <a href="http://larry.masinter.net/">http://larry.masinter.net/</a>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<span class="grey">Berners-Lee, et al. Standards Track [Page 60]</span>
+<a name="page-61" id="page-61" href="#page-61"><span class="break"> </span></a>
+<span class="grey"><a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a> URI Generic Syntax January 2005</span>
+
+
+Full Copyright Statement
+
+ Copyright (C) The Internet Society (2005).
+
+ This document is subject to the rights, licenses and restrictions
+ contained in <a href="http://tools.ietf.org/html/bcp78">BCP 78</a>, and except as set forth therein, the authors
+ retain all their rights.
+
+ This document and the information contained herein are provided on an
+ "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS
+ OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET
+ ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED,
+ INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE
+ INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED
+ WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Intellectual Property
+
+ The IETF takes no position regarding the validity or scope of any
+ Intellectual Property Rights or other rights that might be claimed to
+ pertain to the implementation or use of the technology described in
+ this document or the extent to which any license under such rights
+ might or might not be available; nor does it represent that it has
+ made any independent effort to identify any such rights. Information
+ on the IETF's procedures with respect to rights in IETF Documents can
+ be found in <a href="http://tools.ietf.org/html/bcp78">BCP 78</a> and <a href="http://tools.ietf.org/html/bcp79">BCP 79</a>.
+
+ Copies of IPR disclosures made to the IETF Secretariat and any
+ assurances of licenses to be made available, or the result of an
+ attempt made to obtain a general license or permission for the use of
+ such proprietary rights by implementers or users of this
+ specification can be obtained from the IETF on-line IPR repository at
+ <a href="http://www.ietf.org/ipr">http://www.ietf.org/ipr</a>.
+
+ The IETF invites any interested party to bring to its attention any
+ copyrights, patents or patent applications, or other proprietary
+ rights that may cover technology that may be required to implement
+ this standard. Please address the information to the IETF at ietf-
+ ipr@ietf.org.
+
+
+Acknowledgement
+
+ Funding for the RFC Editor function is currently provided by the
+ Internet Society.
+
+
+
+
+
+
+Berners-Lee, et al. Standards Track [Page 61]
+<span class="break"> </span>
+
+</pre><br>
+<span class="noprint"><small><small>Html markup produced by rfcmarkup 1.46, available from
+<a href="http://tools.ietf.org/tools/rfcmarkup/">http://tools.ietf.org/tools/rfcmarkup/</a>
+</small></small></span>
+
+</body></html> \ No newline at end of file
diff --git a/doc/rfc3986_grammar_only.txt b/doc/rfc3986_grammar_only.txt
new file mode 100644
index 0000000..e8b8d08
--- /dev/null
+++ b/doc/rfc3986_grammar_only.txt
@@ -0,0 +1,80 @@
+URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+
+hier-part = "//" authority path-abempty
+ / path-absolute
+ / path-rootless
+ / path-empty
+
+URI-reference = URI / relative-ref
+
+absolute-URI = scheme ":" hier-part [ "?" query ]
+
+relative-ref = relative-part [ "?" query ] [ "#" fragment ]
+
+relative-part = "//" authority path-abempty
+ / path-absolute
+ / path-noscheme
+ / path-empty
+
+scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+
+authority = [ userinfo "@" ] host [ ":" port ]
+userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+host = IP-literal / IPv4address / reg-name
+port = *DIGIT
+
+IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+
+IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+
+IPv6address = 6( h16 ":" ) ls32
+ / "::" 5( h16 ":" ) ls32
+ / [ h16 ] "::" 4( h16 ":" ) ls32
+ / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+ / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+ / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
+ / [ *4( h16 ":" ) h16 ] "::" ls32
+ / [ *5( h16 ":" ) h16 ] "::" h16
+ / [ *6( h16 ":" ) h16 ] "::"
+
+h16 = 1*4HEXDIG
+ls32 = ( h16 ":" h16 ) / IPv4address
+IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+dec-octet = DIGIT ; 0-9
+ / %x31-39 DIGIT ; 10-99
+ / "1" 2DIGIT ; 100-199
+ / "2" %x30-34 DIGIT ; 200-249
+ / "25" %x30-35 ; 250-255
+
+reg-name = *( unreserved / pct-encoded / sub-delims )
+
+path = path-abempty ; begins with "/" or is empty
+ / path-absolute ; begins with "/" but not "//"
+ / path-noscheme ; begins with a non-colon segment
+ / path-rootless ; begins with a segment
+ / path-empty ; zero characters
+
+path-abempty = *( "/" segment )
+path-absolute = "/" [ segment-nz *( "/" segment ) ]
+path-noscheme = segment-nz-nc *( "/" segment )
+path-rootless = segment-nz *( "/" segment )
+path-empty = 0<pchar>
+
+segment = *pchar
+segment-nz = 1*pchar
+segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+ ; non-zero-length segment without any colon ":"
+
+pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+
+query = *( pchar / "/" / "?" )
+
+fragment = *( pchar / "/" / "?" )
+
+pct-encoded = "%" HEXDIG HEXDIG
+
+unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+reserved = gen-delims / sub-delims
+gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+ / "*" / "+" / "," / ";" / "="