From c750418c3f75f3c70c2788f3b37ca4a052ce7c95 Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Sat, 17 Apr 2021 13:47:11 +0200 Subject: [PATCH 01/14] WIP: Switch to markdown and fix code blocks TODO: 22/23 pages. TODO: maybe chose another syntax color (See README.md) I had a lot of difficulties while reading this great book, and most where related to code blocks being badly formatted. For instance, the `RCLASS(tmp)->iv_tbl` would render as `RCLASS->iv_tbl`. Rather than fixing textile which really is not the goto markup language nowadays. I've made another attempt to switch to markdown (seeing that the former wasn't successful). I've done that using the script below, and allowing myself a few changes: 1. add syntax highlighter (See README.md), 2. wrap images in `figure` html tags, and show captions below, 3. make a few small cosmetic changes (See styles.css), 4. remove the old textile related plugin, 5. remove differentiation between small and large code blocks. ```awk BEGIN { # Set it to 1 and remove the first `!` of any rule to only print that rule. DEBUG = 0 figure_count = 0 in_code = 0 } function esc(s) { gsub("%", "%%", s) return s } function print_all_but_first() { for (i=2; i" print "\t\""alt"\"" print "\t
"alt"
" print "" next } !DEBUG && !in_code && /"[^""]+":[^ ]+/ { rv = $0 i = 10 while(match(rv, /"[^""]+":[^ ]+/)) { if (!i--) exit 1 # inifinte loop somehow in preface.textile, change by hand. full_length = RLENGTH match(rv, /"[^""]+":[^ ]/) text = substr(rv, RSTART + 1, RLENGTH - 4) link = substr(rv, RSTART + RLENGTH - 1, full_length - RLENGTH + 1) if (match(link, /^[a-z]+\.html$/)) link = substr(link, 0, length(link) - length(".html")) sub(/"[^""]+":[^ ]+/, "["text"]("link")TODO-checklink", rv) } print rv next } !DEBUG { print } # default (thanks to `next` in every action) ``` Signed-off-by: Ulysse Buonomo --- .gitignore | 3 +- Gemfile | 2 +- Gemfile.lock | 90 +++++++++++------- README.md | 18 ++++ _config.yml | 10 +- _layouts/default.html | 1 + _plugins/rhg_textile_converter.rb | 59 ------------ css/highlight.css | 82 ++++++++++++++++ css/styles.css | 25 ++++- variable.textile => variable.md | 149 ++++++++++++++++-------------- 10 files changed, 272 insertions(+), 167 deletions(-) delete mode 100644 _plugins/rhg_textile_converter.rb create mode 100644 css/highlight.css rename variable.textile => variable.md (93%) diff --git a/.gitignore b/.gitignore index ad9af3a..f1f7f13 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ -_site +/_site +/.jekyll-cache ebooks *# *~ diff --git a/Gemfile b/Gemfile index 5366c07..afe1e9f 100644 --- a/Gemfile +++ b/Gemfile @@ -1,3 +1,3 @@ source "https://rubygems.org" gem "jekyll" -gem "RedCloth" +gem "rouge" diff --git a/Gemfile.lock b/Gemfile.lock index 1d8ae3b..288474e 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,40 +1,68 @@ GEM remote: https://rubygems.org/ specs: - RedCloth (4.2.9) - classifier (1.3.3) - fast-stemmer (>= 1.0.0) - colorator (0.1) - commander (4.1.3) - highline (~> 1.6.11) - directory_watcher (1.4.1) - fast-stemmer (1.0.2) - highline (1.6.19) - jekyll (1.0.3) - classifier (~> 1.3) - colorator (~> 0.1) - commander (~> 4.1.3) - directory_watcher (~> 1.4.1) - kramdown (~> 1.0.2) - liquid (~> 2.3) - maruku (~> 0.5) - pygments.rb (~> 0.5.0) - safe_yaml (~> 0.7.0) - kramdown (1.0.2) - liquid (2.5.0) - maruku (0.6.1) - syntax (>= 1.0.0) - posix-spawn (0.3.6) - pygments.rb (0.5.2) - posix-spawn (~> 0.3.6) - yajl-ruby (~> 1.1.0) - safe_yaml (0.7.1) - syntax (1.0.0) - yajl-ruby (1.1.0) + addressable (2.7.0) + public_suffix (>= 2.0.2, < 5.0) + colorator (1.1.0) + concurrent-ruby (1.1.8) + em-websocket (0.5.2) + eventmachine (>= 0.12.9) + http_parser.rb (~> 0.6.0) + eventmachine (1.2.7) + ffi (1.15.0) + forwardable-extended (2.6.0) + http_parser.rb (0.6.0) + i18n (1.8.10) + concurrent-ruby (~> 1.0) + jekyll (4.2.0) + addressable (~> 2.4) + colorator (~> 1.0) + em-websocket (~> 0.5) + i18n (~> 1.0) + jekyll-sass-converter (~> 2.0) + jekyll-watch (~> 2.0) + kramdown (~> 2.3) + kramdown-parser-gfm (~> 1.0) + liquid (~> 4.0) + mercenary (~> 0.4.0) + pathutil (~> 0.9) + rouge (~> 3.0) + safe_yaml (~> 1.0) + terminal-table (~> 2.0) + jekyll-sass-converter (2.1.0) + sassc (> 2.0.1, < 3.0) + jekyll-watch (2.2.1) + listen (~> 3.0) + kramdown (2.3.1) + rexml + kramdown-parser-gfm (1.1.0) + kramdown (~> 2.0) + liquid (4.0.3) + listen (3.5.1) + rb-fsevent (~> 0.10, >= 0.10.3) + rb-inotify (~> 0.9, >= 0.9.10) + mercenary (0.4.0) + pathutil (0.16.2) + forwardable-extended (~> 2.6) + public_suffix (4.0.6) + rb-fsevent (0.10.4) + rb-inotify (0.10.1) + ffi (~> 1.0) + rexml (3.2.5) + rouge (3.26.0) + safe_yaml (1.0.5) + sassc (2.4.0) + ffi (~> 1.9) + terminal-table (2.0.0) + unicode-display_width (~> 1.1, >= 1.1.1) + unicode-display_width (1.7.0) PLATFORMS ruby DEPENDENCIES - RedCloth jekyll + rouge + +BUNDLED WITH + 2.2.11 diff --git a/README.md b/README.md index abe867e..a90715d 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,24 @@ $ jekyll serve # this compiles files and starts a server on localhost:4000. [Jekyll usage](https://github.com/mojombo/jekyll/wiki/usage) +
+ +Changing code style + +```zsh +function apply_style { + pygmentize -S $1 -f html -a .highlight > css/highlight.css +} +while read -r style; do + echo $style + apply_style $style + read -qs && echo $style >> t.preselection +done < <(pygmentize -L styles | awk -F'[ :]' '/^\* / { print $2 }') +``` + +
+ + Reading in EPUB ========= diff --git a/_config.yml b/_config.yml index 972afec..38c9741 100644 --- a/_config.yml +++ b/_config.yml @@ -1,3 +1,9 @@ -redcloth: - hard_breaks: false exclude: ["ebooks", "script", "original_repo", "Gemfile", "Gemfile.lock", "dictionary.txt", "README.md"] +markdown: kramdown +highlighter: rouge +pygments: true +kramdown: + syntax_highlighter: rouge + input: GFM + auto_ids: true + toc_levels: 1..3 diff --git a/_layouts/default.html b/_layouts/default.html index f28bf83..e2c1fe9 100644 --- a/_layouts/default.html +++ b/_layouts/default.html @@ -4,6 +4,7 @@ {% if page.title %} {{ page.title }} | {% endif %} Ruby Hacking Guide + diff --git a/_plugins/rhg_textile_converter.rb b/_plugins/rhg_textile_converter.rb deleted file mode 100644 index c2e423f..0000000 --- a/_plugins/rhg_textile_converter.rb +++ /dev/null @@ -1,59 +0,0 @@ -require "jekyll/converters/textile" -module Jekyll::Converters - class RhgTextile < Textile - safe true - - # set this :low before "jekyll serve" when you want to use only Jekyll::Converters::Textile - priority :high - - RHG_CODE_RE = /`([^`]+)`/ - RHG_IMAGE_RE = /^!(.+\.(?:jpg|png))\((.+)\)!/ - - def convert(content) - # try to enable the syntax of the original RubyForge project, - # but not fully. - lines = content.lines - skips = [] - skips << "cvs diff parse.y" # chapter 11 - skips << "69 EXPR_DOT, /*" # chapter 11 - content = lines.map do |line| - unless skips.any? {|s| line.include? s } - line = line.gsub(RHG_CODE_RE) { "#{$1}" } - end - - # this applies the markup of the original book and - # fixes improper markups of the generated htmls at the same time. - if line =~ /^▼ / - line = %{

#{line.rstrip}

\n} - end - - line - end.join - - # try to apply the style for images of the original book - figc = 0 - no_figc = content.include? %{class="image"} - content.gsub!(RHG_IMAGE_RE) do |m| - figc += 1 - src, title = $~[1..2] - alt = "(" + src.split(".").first.split("_").last + ")" - title = "Figure #{figc}: #{title}" unless no_figc - out = <<-EOS -

-#{alt}
-#{title} -

- EOS - end - - super content - end - - unless Jekyll::Converter.respond_to? :descendants - # simulate Jekyll::Converter.inherited - Jekyll::Converter.subclasses << self - Jekyll::Converter.subclasses.sort! - end - end -end - diff --git a/css/highlight.css b/css/highlight.css new file mode 100644 index 0000000..1b008fb --- /dev/null +++ b/css/highlight.css @@ -0,0 +1,82 @@ +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +.highlight .hll { background-color: #ffffcc } +.highlight { background: #f8f8f8; } +.highlight .c { color: #8f5902; font-style: italic } /* Comment */ +.highlight .err { color: #a40000; border: 1px solid #ef2929 } /* Error */ +.highlight .g { color: #000000 } /* Generic */ +.highlight .k { color: #204a87; font-weight: bold } /* Keyword */ +.highlight .l { color: #000000 } /* Literal */ +.highlight .n { color: #000000 } /* Name */ +.highlight .o { color: #ce5c00; font-weight: bold } /* Operator */ +.highlight .x { color: #000000 } /* Other */ +.highlight .p { color: #000000; font-weight: bold } /* Punctuation */ +.highlight .ch { color: #8f5902; font-style: italic } /* Comment.Hashbang */ +.highlight .cm { color: #8f5902; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #8f5902; font-style: italic } /* Comment.Preproc */ +.highlight .cpf { color: #8f5902; font-style: italic } /* Comment.PreprocFile */ +.highlight .c1 { color: #8f5902; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #8f5902; font-style: italic } /* Comment.Special */ +.highlight .gd { color: #a40000 } /* Generic.Deleted */ +.highlight .ge { color: #000000; font-style: italic } /* Generic.Emph */ +.highlight .gr { color: #ef2929 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #00A000 } /* Generic.Inserted */ +.highlight .go { color: #000000; font-style: italic } /* Generic.Output */ +.highlight .gp { color: #8f5902 } /* Generic.Prompt */ +.highlight .gs { color: #000000; font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #a40000; font-weight: bold } /* Generic.Traceback */ +.highlight .kc { color: #204a87; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #204a87; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #204a87; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #204a87; font-weight: bold } /* Keyword.Pseudo */ +.highlight .kr { color: #204a87; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #204a87; font-weight: bold } /* Keyword.Type */ +.highlight .ld { color: #000000 } /* Literal.Date */ +.highlight .m { color: #0000cf; font-weight: bold } /* Literal.Number */ +.highlight .s { color: #4e9a06 } /* Literal.String */ +.highlight .na { color: #c4a000 } /* Name.Attribute */ +.highlight .nb { color: #204a87 } /* Name.Builtin */ +.highlight .nc { color: #000000 } /* Name.Class */ +.highlight .no { color: #000000 } /* Name.Constant */ +.highlight .nd { color: #5c35cc; font-weight: bold } /* Name.Decorator */ +.highlight .ni { color: #ce5c00 } /* Name.Entity */ +.highlight .ne { color: #cc0000; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #000000 } /* Name.Function */ +.highlight .nl { color: #f57900 } /* Name.Label */ +.highlight .nn { color: #000000 } /* Name.Namespace */ +.highlight .nx { color: #000000 } /* Name.Other */ +.highlight .py { color: #000000 } /* Name.Property */ +.highlight .nt { color: #204a87; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #000000 } /* Name.Variable */ +.highlight .ow { color: #204a87; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #f8f8f8; text-decoration: underline } /* Text.Whitespace */ +.highlight .mb { color: #0000cf; font-weight: bold } /* Literal.Number.Bin */ +.highlight .mf { color: #0000cf; font-weight: bold } /* Literal.Number.Float */ +.highlight .mh { color: #0000cf; font-weight: bold } /* Literal.Number.Hex */ +.highlight .mi { color: #0000cf; font-weight: bold } /* Literal.Number.Integer */ +.highlight .mo { color: #0000cf; font-weight: bold } /* Literal.Number.Oct */ +.highlight .sa { color: #4e9a06 } /* Literal.String.Affix */ +.highlight .sb { color: #4e9a06 } /* Literal.String.Backtick */ +.highlight .sc { color: #4e9a06 } /* Literal.String.Char */ +.highlight .dl { color: #4e9a06 } /* Literal.String.Delimiter */ +.highlight .sd { color: #8f5902; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #4e9a06 } /* Literal.String.Double */ +.highlight .se { color: #4e9a06 } /* Literal.String.Escape */ +.highlight .sh { color: #4e9a06 } /* Literal.String.Heredoc */ +.highlight .si { color: #4e9a06 } /* Literal.String.Interpol */ +.highlight .sx { color: #4e9a06 } /* Literal.String.Other */ +.highlight .sr { color: #4e9a06 } /* Literal.String.Regex */ +.highlight .s1 { color: #4e9a06 } /* Literal.String.Single */ +.highlight .ss { color: #4e9a06 } /* Literal.String.Symbol */ +.highlight .bp { color: #3465a4 } /* Name.Builtin.Pseudo */ +.highlight .fm { color: #000000 } /* Name.Function.Magic */ +.highlight .vc { color: #000000 } /* Name.Variable.Class */ +.highlight .vg { color: #000000 } /* Name.Variable.Global */ +.highlight .vi { color: #000000 } /* Name.Variable.Instance */ +.highlight .vm { color: #000000 } /* Name.Variable.Magic */ +.highlight .il { color: #0000cf; font-weight: bold } /* Literal.Number.Integer.Long */ diff --git a/css/styles.css b/css/styles.css index e4af49b..ef54ecc 100644 --- a/css/styles.css +++ b/css/styles.css @@ -5,7 +5,7 @@ body { line-height: 140%; margin-top: 5%; margin-left: 2em; - width: 40em; + width: 35em; } h1 { @@ -72,13 +72,16 @@ dd { } p.caption { - margin-botton: 0px; + margin-bottom: 0px; } -pre { - line-height: 120%; +.highlight { padding: 8px; - background: #eee; + border-radius: 8px; +} + +.highlight pre { + margin: 0; } span.ami { @@ -121,6 +124,18 @@ body { font-family: Georgia, serif; } +figcaption { + font-size: 0.8em; + font-style: italic; +} + +/* code inlined in text */ +code.highlighter-rouge { + background: #33a2; + border-radius: 2px; + padding: 2px; +} + h1 code, h2 code, h3 code, h4 code, h5 code{ font-family: Menlo, Monaco, "Andale Mono", "Lucida console", "courier New", monospace; font-weight: normal; diff --git a/variable.textile b/variable.md similarity index 93% rename from variable.textile rename to variable.md index 957e3a5..b2c118e 100644 --- a/variable.textile +++ b/variable.md @@ -5,11 +5,13 @@ title: Variables and constants Translated by Vincent ISAMBART -h1. Chapter 6: Variables and constants +Chapter 6: Variables and constants +---------------------------------- -h2. Outline of this chapter +Outline of this chapter +======================= -h3. Ruby variables +### Ruby variables In Ruby there are quite a lot of different types of variables and constants. Let's line them up, starting from the largest scope. @@ -29,28 +31,28 @@ this chapter we'll talk about: We will talk about local variables in the third part of the book. -h3. API for variables +### API for variables The object of this chapter's analysis is `variable.c`. Let me first introduce the APIs which would be the entry points. -
+```c
 VALUE rb_iv_get(VALUE obj, char *name)
 VALUE rb_ivar_get(VALUE obj, ID name)
 VALUE rb_iv_set(VALUE obj, char *name, VALUE val)
 VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
-
+``` These are the APIs to access instance variables which have already been described. They are shown here again because their definitions are in `variable.c`. -
+```c
 VALUE rb_cv_get(VALUE klass, char *name)
 VALUE rb_cvar_get(VALUE klass, ID name)
 VALUE rb_cv_set(VALUE klass, char *name, VALUE val)
 VALUE rb_cvar_set(VALUE klass, ID name, VALUE val)
-
+``` These functions are the API for accessing class variables. Class variables belong directly to classes so the functions take a class as @@ -60,30 +62,30 @@ variable "name". The ones with a shorter name are generally easier to use because they take a `char*`. The ones with a longer name are more for internal use as they take a `ID`. -
+```c
 VALUE rb_const_get(VALUE klass, ID name)
 VALUE rb_const_get_at(VALUE klass, ID name)
 VALUE rb_const_set(VALUE klass, ID name, VALUE val)
-
+``` These functions are for accessing constants. Constants also belong to classes so they take classes as parameter. `rb_const_get()` follows the superclass chain, whereas `rb_const_get_at()` does not (it just looks in `klass`). -
+```c
 struct global_entry *rb_global_entry(ID name)
 VALUE rb_gv_get(char *name)
 VALUE rb_gvar_get(struct global_entry *ent)
 VALUE rb_gv_set(char *name, VALUE val)
 VALUE rb_gvar_set(struct global_entry *ent, VALUE val)
-
+``` These last functions are for accessing global variables. They are a little different from the others due to the use of `struct global_entry`. We'll explain this while describing the implementation. -h3. Points of this chapter +### Points of this chapter The most important point when talking about variables is "Where and how are variables stored?", in other words: data structures. @@ -96,7 +98,8 @@ should think by comparing the implementation with the specification, like "It behaves like this in this situation so its implementation couldn't be other then this!" -h2. Class variables +Class variables +=============== Class variables are variables that belong to classes. In Java or C++ they are called static variables. They can be accessed from both the @@ -105,7 +108,7 @@ information only available in the evaluator, and we do not have one for the moment. So from the C level it's like having no access range. We'll just focus on the way these variables are stored. -h3. Reading +### Reading The functions to get a class variable are `rb_cvar_get()` and `rb_cv_get()`. The function with the longer name takes `ID` as @@ -113,7 +116,7 @@ parameter and the one with the shorter one takes `char*`. Because the one taking an `ID` seems closer to the internals, we'll look at it. ▼ `rb_cvar_get()` -
+```c
 1508  VALUE
 1509  rb_cvar_get(klass, id)
 1510      VALUE klass;
@@ -141,7 +144,7 @@ one taking an `ID` seems closer to the internals, we'll look at it.
 1532  }
 
 (variable.c)
-
+``` This function reads a class variable in `klass`. @@ -164,7 +167,8 @@ will return different `ID`s for "`@var`" and "`@@var`". At the Ruby level, the variable type is determined only by the prefix so there's no way to access a class variable called `@var` from Ruby. -h2. Constants +Constants +========= It's a little abrupt but I'd like you to remember the members of `struct RClass`. If we exclude the `basic` member, `struct RClass` @@ -176,19 +180,19 @@ contains: Then, considering that: -# constants belong to a class -# we can't see any table dedicated to constants in `struct RClass` -# class variables and instance variables are both in `iv_tbl` +* constants belong to a class +* we can't see any table dedicated to constants in `struct RClass` +* class variables and instance variables are both in `iv_tbl` Could it mean that the constants are also... -h3. Assignment +### Assignment `rb_const_set()` is a function to set the value of constants: it sets the constant `id` in the class `klass` to the value `val`. ▼ `rb_const_set()` -
+```c
 1377  void
 1378  rb_const_set(klass, id, val)
 1379      VALUE klass;
@@ -199,12 +203,12 @@ the constant `id` in the class `klass` to the value `val`.
 1384  }
 
 (variable.c)
-
+``` `mod_av_set()` does all the hard work: ▼ `mod_av_set()` -
+```c
 1352  static void
 1353  mod_av_set(klass, id, val, isconst)
 1354      VALUE klass;
@@ -231,39 +235,39 @@ the constant `id` in the class `klass` to the value `val`.
 1375  }
 
 (variable.c)
-
+``` You can this time again ignore the warning checks (`rb_raise()`, `rb_error_frozen()` and `rb_warn()`). Here's what's left: ▼ `mod_av_set()` (only the important part) -
+```c
     if (!RCLASS(klass)->iv_tbl) {
         RCLASS(klass)->iv_tbl = st_init_numtable();
     }
     st_insert(RCLASS(klass)->iv_tbl, id, val);
-
+``` We're now sure constants also reside in the instance table. It means in the `iv_tbl` of `struct RClass`, the following are mixed together: -# the class's own instance variables -# class variables -# constants +* the class's own instance variables +* class variables +* constants -h3. Reading +### Reading We now know how the constants are stored. We'll now check how they really work. -h4. `rb_const_get()` +#### `rb_const_get()` We'll now look at `rb_const_get()`, the function to read a constant. This function returns the constant referred to by `id` from the class `klass`. ▼ `rb_const_get()` -
+```c
 1156  VALUE
 1157  rb_const_get(klass, id)
 1158      VALUE klass;
@@ -302,7 +306,7 @@ constant. This function returns the constant referred to by `id` from the class
 1189  }
 
 (variable.c)
-
+``` There's a lot of code in the way. First, we should at least remove the `rb_name_error()` in the second half. In the middle, what's around @@ -310,7 +314,7 @@ There's a lot of code in the way. First, we should at least remove the remove that for the time being. The function gets reduced to this: ▼ `rb_const_get` (simplified) -
+```c
 VALUE
 rb_const_get(klass, id)
     VALUE klass;
@@ -327,20 +331,20 @@ rb_const_get(klass, id)
         tmp = RCLASS(tmp)->super;
     }
 }
-
+``` Now it should be pretty easy to understand. The function searches for the constant in `iv_tbl` while climbing `klass`'s superclass chain. That means: -
+```ruby
 class A
   Const = "ok"
 end
 class B < A
   p(Const)    # can be accessed
 end
-
+``` The only problem remaining is `top_const_get()`. This function is only called for `rb_cObject` so `top` must mean "top-level". If you don't @@ -348,26 +352,26 @@ remember, at the top-level, the class is `Object`. This means the same as "in the class statement defining `C`, the class becomes `C`", meaning that "the top-level's class is `Object`". -
-# the class of the top-level is Object
+```ruby
+* the class of the top-level is Object
 class A
   # the class is A
   class B
     # the class is B
   end
 end
-
+``` So `top_const_get()` probably does something specific to the top level. -h4. `top_const_get()` +#### `top_const_get()` Let's look at this `top_const_get` function. It looks up the `id` constant writes the value in `klassp` and returns. ▼ `top_const_get()` -
+```c
 1102  static int
 1103  top_const_get(id, klassp)
 1104      ID id;
@@ -386,7 +390,7 @@ constant writes the value in `klassp` and returns.
 1117  }
 
 (variable.c)
-
+``` `rb_class_tbl` was already mentioned in chapter 4 "Classes and modules". It's the table for storing the classes defined at the @@ -399,9 +403,9 @@ It is designed to be able to register a library that is loaded automatically when accessing a particular top-level constant for the first time. This can be used like this: -
+```ruby
 autoload(:VeryBigClass, "verybigclass")   # VeryBigClass is defined in it
-
+``` After this, when `VeryBigClass` is accessed for the first time, the `verybigclass` library is loaded (with `require`). As long as @@ -416,7 +420,7 @@ change in how it works soon. 1.8: autoloaded constants do not need to be defined at top-level anymore). -h4. Other classes? +#### Other classes? But where did the code for looking up constants in other classes end up? After all, constants are first looked up in the outside classes, then @@ -430,9 +434,10 @@ evaluator is handled. Specifically, this search in other classes is done in the `ev_const_get()` function of `eval.c`. We'll look at it and finish with the constants in the third part of the book. -h2. Global variables +Global variables +================ -h3. General remarks +### General remarks Global variables can be accessed from anywhere. Or put the other way around, there is no need to restrict access to them. Because they are @@ -450,11 +455,11 @@ the following are only available for global variables: Let's explain this simply. -h4. Aliases of variables +#### Aliases of variables -
+```ruby
 alias $newname $oldname
-
+``` After this, you can use `$newname` instead of `$oldname`. `alias` for variables is mainly a counter-measure for "symbol variables". "symbol @@ -470,7 +475,7 @@ That said, currently symbol variables are not recommended, and are moved one by one in singleton methods of suitable modules. The current school of thought is that `$=` and others will be abolished in 2.0. -h4. Hooks +#### Hooks You can "hook" read and write of global variables. @@ -483,13 +488,13 @@ Essentially only special strings like `"EUC"` or `"UTF8"` can be assigned to it, but this is too bothersome so it is designed so that `"e"` or `"u"` can also be used. -
+```ruby
 p($KCODE)      # "NONE" (default)
 $KCODE = "e"
 p($KCODE)      # "EUC"
 $KCODE = "u"
 p($KCODE)      # "UTF8"
-
+``` Knowing that you can hook assignment of global variables, you should understand easily how this can be done. By the way, `$KCODE`'s K comes @@ -502,14 +507,14 @@ functions, and I'd like to use more pages for the analysis of the parser and evaluator. That's why I'll proceed with the explanation below whose degree of half-hearted is 85%. -h3. Data structure +### Data structure I said that the point when looking at how variables work is the way they are stored. First, I'd like you to firmly grasp the structure used by global variables. ▼ Data structure for global variables -
+```c
   21  static st_table *rb_global_tbl;
 
  334  struct global_entry {
@@ -528,14 +533,18 @@ structure used by global variables.
  332  };
 
 (variable.c)
-
+``` `rb_global_tbl` is the main table. All global variables are stored in this table. The keys of this table are of course variable names (`ID`). A value is expressed by a `struct global_entry` and a `struct global_variable` (figure 1). -!images/ch_variable_gvar.png(Global variables table at execution time)! +
+ figure 1: Global variables table at execution time +
figure 1: Global variables table at execution time
+
+ The structure representing the variables is split in two to be able to create `alias`es. When an `alias` is established, two `global_entry`s @@ -554,14 +563,14 @@ When hooks are set at the Ruby level, a list of `struct trace_var`s is stored in the `trace` member of `struct global_variable`, but I won't talk about it, and omit `struct trace_var`. -h3. Reading +### Reading You can have a general understanding of global variables just by looking at how they are read. The functions for reading them are `rb_gv_get()` and `rb_gvar_get()`. ▼ `rb_gv_get() rb_gvar_get()` -
+```c
  716  VALUE
  717  rb_gv_get(name)
  718      const char *name;
@@ -581,7 +590,7 @@ they are read. The functions for reading them are `rb_gv_get()` and
  655  }
 
 (variable.c)
-
+``` A substantial part of the content seems to turn around the `rb_global_entry()` function, but that does not prevent us @@ -594,7 +603,7 @@ function pointer `var->getter`. If `p` is a function pointer, But the main part is still `rb_global_entry()`. ▼ `rb_global_entry()` -
+```c
  351  struct global_entry*
  352  rb_global_entry(id)
  353      ID id;
@@ -621,7 +630,7 @@ But the main part is still `rb_global_entry()`.
  374  }
 
 (variable.c)
-
+``` The main treatment is only done by the `st_lookup()` at the beginning. What's done afterwards is just creating (and storing) a new entry. As, when @@ -642,7 +651,7 @@ undefined global variables can be read. `undef_setter()` is a little bit interesting so let's look at it. ▼ `undef_setter()` -
+```c
  385  static void
  386  undef_setter(val, id, data, var)
  387      VALUE val;
@@ -658,11 +667,15 @@ interesting so let's look at it.
  397  }
 
 (variable.c)
-
+``` `val_getter()` takes the value from `entry->data` and returns it. `val_getter()` just puts a value in `entry->data`. Setting handlers this way allows us not to need special handling for undefined variables (figure 2). Skillfully done, isn't it? -!images/ch_variable_gaccess.png(Setting and consultation of global variables)! + +
+ figure 2: Setting and consultation of global variables +
figure 2: Setting and consultation of global variables
+
From dfd40588a54d46f20880f8253007a9819e763c05 Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Sat, 17 Apr 2021 14:33:17 +0200 Subject: [PATCH 02/14] replace `@` with backticks in fin.textile --- fin.textile | 54 ++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/fin.textile b/fin.textile index 3777148..af65b38 100644 --- a/fin.textile +++ b/fin.textile @@ -6,11 +6,11 @@ h1. Final Chapter: Ruby's future h2. Issues to be addressed -@ruby@ isn't 'completely finished' software. It's still being developed, +`ruby` isn't 'completely finished' software. It's still being developed, there are still a lot of issues. Firstly, we want to try removing inherent problems in the current interpreter. -The order of the topics is mostly in the same order as the chapters of +The order of the topics is mostly in the same order as the chapters of this book. @@ -43,41 +43,41 @@ there might be the necessity to consider Incremental GC. h3. Implementation of parser -As we saw in Part 2, the implementation of @ruby@ parser has already utilized -@yacc@'s ability to almost its limit, thus I can't think it can endure further +As we saw in Part 2, the implementation of `ruby` parser has already utilized +`yacc`'s ability to almost its limit, thus I can't think it can endure further expansions. It's all right if there's nothing planned to expand, but a big name "keyword argument" is planned next and it's sad if we could not express another demanded grammar because of the -limitation of @yacc@. +limitation of `yacc`. h3. Reuse of parser -Ruby's parser is very complex. In particular, dealing with around @lex_state@ +Ruby's parser is very complex. In particular, dealing with around `lex_state` seriously is very hard. Due to this, embedding a Ruby program or creating a program to deal with a Ruby program itself is quite difficult. -For example, I'm developing a tool named @racc@, -which is prefixed with R because it is a Ruby-version @yacc@. -With @racc@, the syntax of grammar files are almost the same as @yacc@ +For example, I'm developing a tool named `racc`, +which is prefixed with R because it is a Ruby-version `yacc`. +With `racc`, the syntax of grammar files are almost the same as `yacc` but we can write actions in Ruby. To do so, it could not determine the end of an action without parsing Ruby code properly, but "properly" is very difficult. Since there's no other choice, currently I've compromised at the level that it can parse "almost all". As another example which requires analyzing Ruby program, -I can enumerate some tools like @indent@ and @lint@, +I can enumerate some tools like `indent` and `lint`, but creating such tool also requires a lot efforts. It would be desperate if it is something complex like a refactoring tool. Then, what can we do? If we can't recreate the same thing, -what if @ruby@'s original parser can be used as a component? +what if `ruby`'s original parser can be used as a component? In other words, making the parser itself a library. This is a feature we want by all means. -However, what becomes problem here is, as long as @yacc@ is used, +However, what becomes problem here is, as long as `yacc` is used, we cannot make parser reentrant. -It means, say, we cannot call @yyparse()@ recursively, +It means, say, we cannot call `yyparse()` recursively, and we cannot call it from multiple threads. Therefore, it should be implemented in the way of not returning control to Ruby while parsing. @@ -86,14 +86,14 @@ while parsing. h3. Hiding Code -With current @ruby@, it does not work without the source code of the program to +With current `ruby`, it does not work without the source code of the program to run. Thus, people who don't want others to read their source code might have trouble. h3. Interpretor Object -Currently each process cannot have multiple @ruby@ interpretors, +Currently each process cannot have multiple `ruby` interpretors, this was discussed in Chapter 13. If having multiple interpretors is practically possible, it seems better, but is it possible to implement such thing? @@ -101,18 +101,18 @@ but is it possible to implement such thing? h3. The structure of evaluator -Current @eval.c@ is, above all, too complex. +Current `eval.c` is, above all, too complex. Embedding Ruby's stack frames to machine stack could occasionally become the -source of trouble, using @setjmp() longjmp()@ aggressively makes it less easy to +source of trouble, using `setjmp() longjmp()` aggressively makes it less easy to understand and slows down its speed. -Particularly with RISC machine, which has many registers, using @setjmp()@ -aggressively can easily cause slowing down because @setjmp()@ set aside all +Particularly with RISC machine, which has many registers, using `setjmp()` +aggressively can easily cause slowing down because `setjmp()` set aside all things in registers. h3. The performance of evaluator -@ruby@ is already enough fast for ordinary use. +`ruby` is already enough fast for ordinary use. But aside from it, regarding a language processor, definitely the faster is the better. To achieve better performance, in other words to optimize, @@ -136,17 +136,17 @@ So I profiled. This is a profile when running some application but this is approximately the profile of a general Ruby program. -@rb_eval()@ appeared in the overwhelming percentage being at the top, +`rb_eval()` appeared in the overwhelming percentage being at the top, after that, in addition to functions of GC, evaluator core, functions that are specific to the program are mixed. For example, in the case of this application, -it takes a lot of time for regular expression match (@ruby_re_match@). +it takes a lot of time for regular expression match (`ruby_re_match`). However, even if we understood this, the question is how to improve it. -To think simply, it can be archived by making @rb_eval()@ faster. -That said, but as for @ruby@ core, there are almost not any room which can be -easily optimized. For instance, apparently "tail recursive -> @goto@ conversion" -used in the place of @NODE_IF@ and others has already applied almost all +To think simply, it can be archived by making `rb_eval()` faster. +That said, but as for `ruby` core, there are almost not any room which can be +easily optimized. For instance, apparently "tail recursive -> `goto` conversion" +used in the place of `NODE_IF` and others has already applied almost all possible places it can be applied. In other words, without changing the way of thinking fundamentally, there's no room to improve. @@ -156,7 +156,7 @@ h3. The implementation of thread This was also discussed in Chapter 19. There are really a lot of issues about the implementation of the current ruby's thread. Particularly, it cannot mix -with native threads so badly. The two great advantages of @ruby@'s thread, +with native threads so badly. The two great advantages of `ruby`'s thread, (1) high portability (2) the same behavior everywhere, are definitely incomparable, but probably that implementation is something we cannot continue to use eternally, isn't it? From 95ee728564156a04770c92e36e9f0de0a15646a5 Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Sat, 17 Apr 2021 15:24:39 +0200 Subject: [PATCH 03/14] make style links relative to work if not top level --- _layouts/default.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_layouts/default.html b/_layouts/default.html index e2c1fe9..e17dcb7 100644 --- a/_layouts/default.html +++ b/_layouts/default.html @@ -3,8 +3,8 @@ {% if page.title %} {{ page.title }} | {% endif %} Ruby Hacking Guide - - + + From 3a145e810c8f3966fa0a5a7912b09719ed4a3c0b Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Sat, 17 Apr 2021 16:07:41 +0200 Subject: [PATCH 04/14] Rename all files (no check yet) ```bash for f in *.textile; do if awk -f t.textile2md.awk $f > ${f%.textile}.md; then rm $f else echo "error with $f" fi done ``` --- anyeval.textile => anyeval.md | 116 ++--- class.textile => class.md | 382 ++++++++------- contextual.textile => contextual.md | 513 ++++++++++---------- evaluator.textile => evaluator.md | 209 +++++---- fin.textile => fin.md | 68 +-- gc.textile => gc.md | 342 ++++++++------ index.md | 86 ++++ index.textile | 84 ---- intro.textile => intro.md | 335 ++++++------- iterator.textile => iterator.md | 210 +++++---- load.textile => load.md | 178 +++---- method.textile => method.md | 140 +++--- minimum.textile => minimum.md | 471 ++++++++++--------- module.textile => module.md | 270 +++++------ name.textile => name.md | 162 ++++--- object.textile => object.md | 263 ++++++----- parser.textile => parser.md | 441 ++++++++++-------- preface.textile => preface.md | 30 +- security.textile => security.md | 41 +- spec.textile => spec.md | 698 ++++++++++++++-------------- syntree.textile => syntree.md | 322 ++++++------- thread.textile => thread.md | 185 ++++---- yacc.textile => yacc.md | 294 ++++++------ 23 files changed, 3094 insertions(+), 2746 deletions(-) rename anyeval.textile => anyeval.md (96%) rename class.textile => class.md (92%) rename contextual.textile => contextual.md (94%) rename evaluator.textile => evaluator.md (96%) rename fin.textile => fin.md (96%) rename gc.textile => gc.md (94%) create mode 100644 index.md delete mode 100644 index.textile rename intro.textile => intro.md (92%) rename iterator.textile => iterator.md (93%) rename load.textile => load.md (95%) rename method.textile => method.md (96%) rename minimum.textile => minimum.md (88%) rename module.textile => module.md (96%) rename name.textile => name.md (92%) rename object.textile => object.md (93%) rename parser.textile => parser.md (94%) rename preface.textile => preface.md (95%) rename security.textile => security.md (95%) rename spec.textile => spec.md (91%) rename syntree.textile => syntree.md (95%) rename thread.textile => thread.md (96%) rename yacc.textile => yacc.md (94%) diff --git a/anyeval.textile b/anyeval.md similarity index 96% rename from anyeval.textile rename to anyeval.md index a3d06f5..2868ae0 100644 --- a/anyeval.textile +++ b/anyeval.md @@ -2,9 +2,11 @@ layout: default title: "Chapter 17: Dynamic evaluation" --- -h1. Chapter 17: Dynamic evaluation +Chapter 17: Dynamic evaluation +------------------------------ -h2. Overview +Overview +======== I have already finished to describe about the mechanism of the evaluator by the previous chapter. @@ -14,7 +16,7 @@ There are three targets: `eval`, `Module#module_eval` and `Object#instance_eval`. -h3. `eval` +### `eval` I've already described about `eval`, @@ -26,20 +28,20 @@ Its return value is the value of the last expression of the program. -
+```TODO-lang
 p eval("1 + 1")   # 2
-
+``` You can also refer to a variable in its scope from inside of a string to `eval`. -
+```TODO-lang
 lvar = 5
 @ivar = 6
 p eval("lvar + @ivar")   # 11
-
+``` Readers who have been reading until here cannot simply read and pass over the @@ -52,14 +54,14 @@ And you can also define methods and define classes. -
+```TODO-lang
 def a
   eval('class C;  def test() puts("ok") end   end')
 end
 
 a()          # define class C and C#test
 C.new.test   # shows ok
-
+``` Moreover, as mentioned a little in the previous chapter, @@ -68,20 +70,20 @@ its environment. -
+```TODO-lang
 def new_env
   n = 5
   Proc.new { nil }   # turn the environment of this method into an object and return it
 end
 
 p eval('n * 3', new_env())   # 15
-
+``` -h3. `module_eval` and `instance_eval` +### `module_eval` and `instance_eval` When a `Proc` is passed as the second argument of `eval`, the evaluations can be @@ -91,7 +93,7 @@ is as if in a module statement or a class statement. -
+```TODO-lang
 lvar = "toplevel lvar"   # a local variable to confirm this scope
 
 module M
@@ -103,7 +105,7 @@ M.module_eval(<<'EOS')   # a suitable situation to use here-document
       puts 'ok'
     end
 EOS
-
+``` With `instance_eval`, you can evaluate in an environment whose `self` of the @@ -111,7 +113,7 @@ singleton class statement is the object. -
+```TODO-lang
 lvar = "toplevel lvar"   # a local variable to confirm this scope
 
 obj = Object.new
@@ -122,7 +124,7 @@ obj.instance_eval(<<'EOS')
       puts 'ok'
     end
 EOS
-
+``` Additionally, these `module_eval` and `instance_eval` can also be used as @@ -131,13 +133,13 @@ For instance, -
+```TODO-lang
 obj = Object.new
 p obj                 # #
 obj.instance_eval {
     p self            # #
 }
-
+``` Like this. @@ -155,10 +157,11 @@ compiled when loading files. -h2. `eval` +`eval` +====== -h3. `eval()` +### `eval()` The `eval` of Ruby branches many times based on the presence and absence of the @@ -166,9 +169,9 @@ parameters. Let's assume the form of call is limited to the below: -
+```TODO-lang
 eval(prog_string, some_block)
-
+``` Then, since this makes the actual interface function `rb_f_eval()` almost @@ -177,10 +180,10 @@ The function prototype of `eval()` is: -
+```TODO-lang
 static VALUE
 eval(VALUE self, VALUE src, VALUE scope, char *file, int line);
-
+``` `scope` is the `Proc` of the second parameter. @@ -190,7 +193,7 @@ is supposed to be located. Then, let's see the content:

▼ `eval()` (simplified)

-
+```TODO-lang
 4984  static VALUE
 4985  eval(self, src, scope, file, line)
 4986      VALUE self, src, scope;
@@ -281,7 +284,7 @@ is supposed to be located. Then, let's see the content:
 5127  }
 
 (eval.c)
-
+``` If this function is shown without any preamble, you probably feel "oww!". @@ -307,7 +310,7 @@ Here is `compile()`.

▼ `compile()`

-
+```TODO-lang
 4968  static NODE*
 4969  compile(src, file, line)
 4970      VALUE src;
@@ -325,7 +328,7 @@ Here is `compile()`.
 4982  }
 
 (eval.c)
-
+``` `ruby_nerrs` is the variable incremented in `yyerror()`. @@ -346,7 +349,7 @@ Let's go back to `parse.y` again and complete this investigation. -h3. `top_local` +### `top_local` I've mentioned that the functions named `local_push() local_pop()` are used @@ -359,11 +362,11 @@ They are called in this sort of way.

▼ How `top_local_init()` is called

-
+```TODO-lang
 program :   { top_local_init(); }
           compstmt
             { top_local_setup(); }
-
+``` Of course, in actuality various other things are also done, @@ -373,7 +376,7 @@ And this is the content of it:

▼ `top_local_init()`

-
+```TODO-lang
 5273  static void
 5274  top_local_init()
 5275  {
@@ -393,7 +396,7 @@ And this is the content of it:
 5289  }
 
 (parse.y)
-
+``` This means that `local_tbl` is copied from `ruby_scope` to `lvtbl`. @@ -404,7 +407,7 @@ Next, here is `top_local_setup()`.

▼ `top_local_setup()`

-
+```TODO-lang
 5291  static void
 5292  top_local_setup()
 5293  {
@@ -447,7 +450,7 @@ Next, here is `top_local_setup()`.
 5329  }
 
 (parse.y)
-
+``` Since `local_vars` can be either in the stack or in the heap, it makes the code @@ -458,7 +461,7 @@ it is forced to change its allocation method to `malloc`. -h3. Block Local Variable +### Block Local Variable By the way, how about block local variables? @@ -468,7 +471,7 @@ it is `yycompile()`.

▼ setting `ruby_dyna_vars` aside

-
+```TODO-lang
 static NODE*
 yycompile(f, line)
 {
@@ -478,7 +481,7 @@ yycompile(f, line)
          :
     ruby_dyna_vars = vars;
 }
-
+``` This looks like a mere save-restore, but the point is that this does not clear @@ -515,7 +518,7 @@ I'd like the readers who noticed this to be relieved by reading the next part.

▼ `yycompile()` − freeing `ruby_dyna_vars`

-
+```TODO-lang
 2386      vp = ruby_dyna_vars;
 2387      ruby_dyna_vars = vars;
 2388      lex_strterm = 0;
@@ -526,7 +529,7 @@ I'd like the readers who noticed this to be relieved by reading the next part.
 2393      }
 
 (parse.y)
-
+``` It is designed so that the loop would stop @@ -536,10 +539,11 @@ when it reaches the link created at the evaluator (`vars`). -h2. `instance_eval` +`instance_eval` +=============== -h3. The Whole Picture +### The Whole Picture The substance of `Module#module_eval` is `rb_mod_module_eval()`, @@ -548,7 +552,7 @@ and the substance of `Object#instance_eval` is `rb_obj_instance_eval()`.

▼ `rb_mod_module_eval() rb_obj_instance_eval()`

-
+```TODO-lang
 5316  VALUE
 5317  rb_mod_module_eval(argc, argv, mod)
 5318      int argc;
@@ -577,7 +581,7 @@ and the substance of `Object#instance_eval` is `rb_obj_instance_eval()`.
 5314  }
 
 (eval.c)
-
+``` These two methods have a common part as "a method to replace `self` with `class`", @@ -603,8 +607,8 @@ But for those who reading, one have to simultaneously face at 2 times 2 = 4 ways it is not a good plan. Therefore, here we assume only the case when -#1 it is an `instance_eval` -#2 which takes a string as its argument +* it is an `instance_eval` +* which takes a string as its argument . And extracting all functions under `rb_obj_instance_eval()` in-line, @@ -612,7 +616,7 @@ folding constants, we'll read the result. -h3. After Absorbed +### After Absorbed After all, @@ -622,7 +626,7 @@ it becomes very comprehensible in comparison to the one before being absorbed.

specific_eval()instance_eval, eval, string

-
+```TODO-lang
 static VALUE
 instance_eval_string(self, src, file, line)
     VALUE self, src;
@@ -666,7 +670,7 @@ instance_eval_string(self, src, file, line)
 
     return result;
 }
-
+``` It seems that this pushes the singleton class of the object to `CLASS` and @@ -678,7 +682,7 @@ missing, but this is also not create so much difference. -h3. Before being absorbed +### Before being absorbed Though the author said it becomes more friendly to read, @@ -693,7 +697,7 @@ Here is the result of cutting them all.

▼ `specific_eval()` (simplified)

-
+```TODO-lang
 5258  static VALUE
 5259  specific_eval(argc, argv, klass, self)
 5260      int argc;
@@ -711,7 +715,7 @@ Here is the result of cutting them all.
 5296  }
 
 (eval.c)
-
+``` As you can see, this is perfectly branches in two ways based on whether there's @@ -731,7 +735,7 @@ Next, we'll look at `eval_under()` and `eval_under_i()`.

▼ `eval_under()`

-
+```TODO-lang
 5222  static VALUE
 5223  eval_under(under, self, src, file, line)
 5224      VALUE under, self, src;
@@ -761,7 +765,7 @@ Next, we'll look at `eval_under()` and `eval_under_i()`.
 5219  }
 
 (eval.c)
-
+``` In this function, in order to make its arguments single, @@ -793,10 +797,10 @@ Also in the previous absorbed version, for only this point, -
+```TODO-lang
 VALUE sclass = .....;
 VALUE cbase = sclass;
-
+``` I thought that I would write this way, diff --git a/class.textile b/class.md similarity index 92% rename from class.textile rename to class.md index 848f7b3..04e77e0 100644 --- a/class.textile +++ b/class.md @@ -5,12 +5,14 @@ title: Classes and modules Translated by Vincent ISAMBART -h1. Chapter 4: Classes and modules +Chapter 4: Classes and modules +------------------------------ In this chapter, we'll see the details of the data structures created by classes and modules. -h2. Classes and methods definition +Classes and methods definition +============================== First, I'd like to have a look at how Ruby classes are defined at the C level. This chapter investigates almost only particular @@ -30,13 +32,13 @@ There are a few other versions of these functions, but the extension libraries and even most of the core library is defined using just this API. I'll introduce to you these functions one by one. -h3. Class definition +### Class definition `rb_define_class()` defines a class at the top-level. Let's take the Ruby array class, `Array`, as an example. ▼ `Array` class definition -
+```TODO-lang
   19  VALUE rb_cArray;
 
 1809  void
@@ -45,7 +47,7 @@ Ruby array class, `Array`, as an example.
 1812      rb_cArray  = rb_define_class("Array", rb_cObject);
 
 (array.c)
-
+``` `rb_cObject` and `rb_cArray` correspond respectively to `Object` and `Array` at the Ruby level. The added prefix `rb` shows that it belongs @@ -58,22 +60,22 @@ the class object, it also defines the constant. That means that after this you can already access `Array` from a Ruby program. It corresponds to the following Ruby program: -
+```TODO-lang
 class Array < Object
-
+``` I'd like you to note the fact that there is no `end`. It was written like this on purpose. It is because with `rb_define_class()` the body of the class has not been executed. -h3. Nested class definition +### Nested class definition After that, there's `rb_define_class_under()`. This function defines a class nested in an other class or module. This time the example is what is returned by `stat(2)`, `File::Stat`. ▼ Definition of `File::Stat` -
+```TODO-lang
   78  VALUE rb_cFile;
   80  static VALUE rb_cStat;
 
@@ -81,52 +83,52 @@ what is returned by `stat(2)`, `File::Stat`.
 2674      rb_cStat = rb_define_class_under(rb_cFile, "Stat", rb_cObject);
 
 (file.c)
-
+``` This code corresponds to the following Ruby program; -
+```TODO-lang
 class File < IO
   class Stat < Object
-
+``` This time again I omitted the `end` on purpose. -h3. Module definition +### Module definition `rb_define_module()` is simple so let's end this quickly. ▼ Definition of `Enumerable` -
+```TODO-lang
   17  VALUE rb_mEnumerable;
 
  492      rb_mEnumerable = rb_define_module("Enumerable");
 
 (enum.c)
-
+``` The `m` in the beginning of `rb_mEnumerable` is similar to the `c` for classes: it shows that it is a module. The corresponding Ruby program is: -
+```TODO-lang
 module Enumerable
-
+``` `rb_define_module_under()` is not used much so we'll skip it. -h3. Method definition +### Method definition This time the function is the one for defining methods, `rb_define_method()`. It's used very often. We'll take once again an example from `Array`. ▼ Definition of `Array#to_s` -
+```TODO-lang
 1818  rb_define_method(rb_cArray, "to_s", rb_ary_to_s, 0);
 
 (array.c)
-
+``` With this the `to_s` method is defined in `Array`. The method body is given by a function pointer (`rb_ary_to_s`). The fourth parameter is @@ -134,13 +136,13 @@ the number of parameters taken by the method. As `to_s` does not take any parameters, it's 0. If we write the corresponding Ruby program, we'll have this: -
+```TODO-lang
 class Array < Object
   def to_s
     # content of rb_ary_to_s()
   end
 end
-
+``` Of course the `class` part is not included in `rb_define_method()` and only the `def` part is accurate. But if there is no `class` part, it @@ -150,26 +152,26 @@ the enclosing `class` part. One more example, this time taking a parameter: ▼ Definition of `Array#concat` -
+```TODO-lang
 1835  rb_define_method(rb_cArray, "concat", rb_ary_concat, 1);
 
 (array.c)
-
+``` The class for the definition is `rb_cArray` (`Array`), the method name is `concat`, its body is `rb_ary_concat()` and the number of parameters is 1. It corresponds to writing the corresponding Ruby program: -
+```TODO-lang
 class Array < Object
   def concat( str )
     # content of rb_ary_concat()
   end
 end
-
+``` -h3. Singleton methods definition +### Singleton methods definition We can define methods that are specific to a single object instance. They are called singleton methods. As I used `File.unlink` as @@ -178,17 +180,17 @@ show it here, but for a particular reason we'll look at `File.link` instead. ▼ Definition of `File.link` -
+```TODO-lang
 2624  rb_define_singleton_method(rb_cFile, "link", rb_file_s_link, 2);
 
 (file.c)
-
+``` It's used like `rb_define_method()`. The only difference is that here the first parameter is just the "object" where the method is defined. In this case, it's defined in `rb_cFile`. -h3. Entry point +### Entry point Being able to make definitions like before is great, but where are these functions called from, and by what means are they executed? @@ -197,7 +199,7 @@ instance, for `Array` a function `Init_Array()` like this has been made: ▼ `Init_Array` -
+```TODO-lang
 1809  void
 1810  Init_Array()
 1811  {
@@ -215,13 +217,13 @@ made:
 1822      rb_define_method(rb_cArray, "frozen?",  rb_ary_frozen_p, 0);
 
 (array.c)
-
+``` The `Init` for the built-in functions are explicitly called during the startup of `ruby`. This is done in `inits.c`. ▼ `rb_call_inits()` -
+```TODO-lang
   47  void
   48  rb_call_inits()
   49  {
@@ -240,7 +242,7 @@ the startup of `ruby`. This is done in `inits.c`.
   62      Init_Array();
 
 (inits.c)
-
+``` This way, `Init_Array()` is called properly. @@ -248,9 +250,9 @@ That explains it for the built-in libraries, but what about extension libraries? In fact, for extension libraries the convention is the same. Take the following code: -
+```TODO-lang
 require "myextension"
-
+``` With this, if the loaded extension library is `myextension.so`, at load time, the (`extern`) function named `Init_myextension()` is @@ -262,7 +264,7 @@ The following example is from `stringio`, an extension library provided with `ruby`, that is to say not from a built-in library. ▼ `Init_stringio()` (beginning) -
+```TODO-lang
  895  void
  896  Init_stringio()
  897  {
@@ -276,11 +278,12 @@ provided with `ruby`, that is to say not from a built-in library.
  904      rb_define_method(StringIO, "reopen", strio_reopen, -1);
 
 (ext/stringio/stringio.c)
-
+``` -h2. Singleton classes +Singleton classes +================= -h3. `rb_define_singleton_method()` +### `rb_define_singleton_method()` You should now be able to more or less understand how normal methods are defined. Somehow making the body of the method, then registering it @@ -288,7 +291,7 @@ in `m_tbl` will do. But what about singleton methods? We'll now look into the way singleton methods are defined. ▼ `rb_define_singleton_method()` -
+```TODO-lang
  721  void
  722  rb_define_singleton_method(obj, name, func, argc)
  723      VALUE obj;
@@ -300,7 +303,7 @@ into the way singleton methods are defined.
  729  }
 
 (class.c)
-
+``` As I explained, `rb_define_method()` is a function used to define normal methods, so the difference from normal methods is only @@ -314,13 +317,13 @@ classes are even more on the implementation side. In the Ruby language way, they are not formally included, and don't appear much at the Ruby level. -h3. `rb_singleton_class()` +### `rb_singleton_class()` Well, let's confirm what the singleton classes are made of. It's too simple to just show you the code of a function each time so this time I'll use a new weapon, a call graph. -
+```TODO-lang
 rb_define_singleton_method
     rb_define_method
     rb_singleton_class
@@ -328,7 +331,7 @@ rb_define_singleton_method
         rb_make_metaclass
             rb_class_boot
             rb_singleton_class_attached
-
+``` Call graphs are graphs showing calling relationships among functions (or more generally procedures). The call graphs showing all the calls @@ -363,7 +366,7 @@ We should look out for the following two points: * What exactly are singleton classes? * What is the purpose of singleton classes? -h3. Normal classes and singleton classes +### Normal classes and singleton classes Singleton classes are special classes: they're basically the same as normal classes, but there are a few differences. We can say that @@ -379,7 +382,7 @@ of `rb_define_class()` itself. I have some reasons to be interested in something that's deeper. That's why we will first look at the call graph of `rb_define_class()`. -
+```TODO-lang
 rb_define_class
     rb_class_inherited
     rb_define_class_id
@@ -388,13 +391,13 @@ rb_define_class
         rb_make_metaclass
             rb_class_boot
             rb_singleton_class_attached
-
+``` I'm interested by `rb_class_new()`. Doesn't this name means it creates a new class? Let's confirm that. ▼ `rb_class_new()` -
+```TODO-lang
   37  VALUE
   38  rb_class_new(super)
   39      VALUE super;
@@ -410,14 +413,14 @@ a new class? Let's confirm that.
   49  }
 
 (class.c)
-
+``` `Check_Type()` is checks the type of object structure, so we can ignore it. `rb_raise()` is error handling so we can ignore it. Only `rb_class_boot()` remains. So let's look at it. ▼ `rb_class_boot()` -
+```TODO-lang
   21  VALUE
   22  rb_class_boot(super)
   23      VALUE super;
@@ -435,7 +438,7 @@ ignore it. `rb_raise()` is error handling so we can ignore it. Only
   35  }
 
 (class.c)
-
+``` `NEWOBJ()` and `OBJSETUP()` are fixed expressions used when creating Ruby objects that possess one of the built-in structure types (`struct Rxxxx`). @@ -456,13 +459,13 @@ and `rb_class_new()` is almost identical. Then, let's once more look at `rb_singleton_class()`'s call graph: -
+```TODO-lang
 rb_singleton_class
     SPECIAL_SINGLETON
     rb_make_metaclass
         rb_class_boot
         rb_singleton_class_attached
-
+``` Here also `rb_class_boot()` is called. So up to that point, it's the same as in normal classes. What's going on after is what's different @@ -471,13 +474,13 @@ characteristics of singleton classes. If everything's clear so far, we just need to read `rb_singleton_class()` and `rb_make_metaclass()`. -h3. Compressed `rb_singleton_class()` +### Compressed `rb_singleton_class()` `rb_singleton_class()` is a little long so we'll first remove its non-essential parts. ▼ `rb_singleton_class()` -
+```TODO-lang
  678  #define SPECIAL_SINGLETON(x,c) do {\
  679      if (obj == (x)) {\
  680          return c;\
@@ -522,7 +525,7 @@ non-essential parts.
  719  }
 
 (class.c)
-
+``` The first and the second half are separated by a blank line. The first half handles special cases and the second half handles the general @@ -544,10 +547,10 @@ related to signals. Because they are defined in `rubysig.h`, you can guess that `INTS` is the abbreviation of interrupts. You can ignore them. -h3. Compressed `rb_make_metaclass()` +### Compressed `rb_make_metaclass()` ▼ `rb_make_metaclass()` -
+```TODO-lang
  142  VALUE
  143  rb_make_metaclass(obj, super)
  144      VALUE obj, super;
@@ -568,7 +571,7 @@ h3. Compressed `rb_make_metaclass()`
  158  }
 
 (class.c)
-
+``` We already saw `rb_class_boot()`. It creates a (normal) class using the `super` parameter as its superclass. After that, the @@ -576,14 +579,14 @@ the `super` parameter as its superclass. After that, the name of the function makes us think that it is the indication of a singleton class. -h3. What are singleton classes? +### What are singleton classes? Finishing the above process, furthermore, we'll through away the declarations because parameters, return values and local variables are all `VALUE`. That makes us able to compress to the following: ▼ `rb_singleton_class() rb_make_metaclass()` (after compression) -
+```TODO-lang
 rb_singleton_class(obj)
 {
     if (FL_TEST(RBASIC(obj)->klass, FL_SINGLETON) &&
@@ -613,7 +616,7 @@ rb_make_metaclass(obj, super)
 
     return klass;
 }
-
+``` The condition of the `if` statement of `rb_singleton_class()` seems quite complicated. However, this condition is not connected to @@ -629,7 +632,7 @@ we'll remove it. With these simplifications, we get the following: ▼ `rb_singleton_class() rb_make_metaclass()` (after recompression) -
+```TODO-lang
 rb_singleton_class(obj)
 {
     klass = create a class with RBASIC(obj)->klass as superclass;
@@ -637,14 +640,14 @@ rb_singleton_class(obj)
     RBASIC(obj)->klass = klass;
     return klass;
 }
-
+``` But there is still a quite hard to understand side to it. That's because `klass` is used too often. So let's rename the `klass` variable to `sclass`. ▼ `rb_singleton_class() rb_make_metaclass()` (variable substitution) -
+```TODO-lang
 rb_singleton_class(obj)
 {
     sclass = create a class with RBASIC(obj)->klass as superclass;
@@ -652,14 +655,17 @@ rb_singleton_class(obj)
     RBASIC(obj)->klass = sclass;
     return sclass;
 }
-
+``` Now it should be very easy to understand. To make it even simpler, I've represented what is done with a diagram (figure 1). In the horizontal direction is the "instance - class" relation, and in the vertical direction is inheritance (the superclasses are above). -!images/ch_class_addsclass.png(`rb_singleton_class`)! +
+ figure 1: `rb_singleton_class` +
figure 1: `rb_singleton_class`
+
When comparing the first and last part of this diagram, you can understand that `sclass` is inserted without changing the @@ -668,25 +674,25 @@ the inheritance is increased one step. By defining methods there, we can define methods which have completely nothing to do with other instances of `klass`. -h3. Singleton classes and instances +### Singleton classes and instances By the way, did you notice about, during the compression process, the call to `rb_singleton_class_attached()` was stealthily removed? Here: -
+```TODO-lang
 rb_make_metaclass(obj, super)
 {
     klass = create a class with super as superclass;
     FL_SET(klass, FL_SINGLETON);
     RBASIC(obj)->klass = klass;
     rb_singleton_class_attached(klass, obj);   /* THIS */
-
+``` Let's have a look at what it does. ▼ `rb_singleton_class_attached()` -
+```TODO-lang
  130  void
  131  rb_singleton_class_attached(klass, obj)
  132      VALUE klass, obj;
@@ -701,7 +707,7 @@ Let's have a look at what it does.
  140  }
 
 (class.c)
-
+``` If the `FL_SINGLETON` flag of `klass` is set... in other words if it's a singleton class, put the `__attached__` → `obj` relation in the @@ -741,7 +747,7 @@ Hence, each singleton class has only one instance ... or rather, it must be limited to one. -h3. Summary +### Summary We've done a lot, maybe made a real mayhem, so let's finish and put everything in order with a summary. @@ -752,16 +758,20 @@ What are singleton classes? They are classes that have the What are singleton methods? They are methods defined in the singleton class of an object. -h2. Metaclasses +Metaclasses +=========== -h3. Inheritance of singleton methods +### Inheritance of singleton methods -h4. Infinite chain of classes +#### Infinite chain of classes Even a class has a class, and it's `Class`. And the class of `Class` is again `Class`. We find ourselves in an infinite loop (figure 2). -!images/ch_class_infloop.png(Infinite loop of classes)! +
+ figure 2: Infinite loop of classes +
figure 2: Infinite loop of classes
+
Up to here it's something we've already gone through. What's going after that is the theme of this chapter. Why do classes have to make a @@ -792,7 +802,7 @@ I'm repeating myself, but the fact that `Class`'s class is `Class` is only to make the implementation easier, there's nothing important in this logic. -h4. "Class is also an object" +#### "Class is also an object" "Everything is an object" is often used as advertising statement when speaking about Ruby. And as a part of that, "Classes are also objects!" @@ -832,15 +842,18 @@ And to implement static methods, another thing was necessary: singleton methods. By chain reaction, that also makes singleton classes necessary. Figure 3 shows these dependency relationships. -!images/ch_class_reqlink.png(Requirements dependencies)! +
+ figure 3: Requirements dependencies +
figure 3: Requirements dependencies
+
-h4. Class methods inheritance +#### Class methods inheritance In Ruby, singleton methods defined in a class are called class methods. However, their specification is a little strange. For some reasons, class methods are inheritable. -
+```TODO-lang
 class A
   def A.test    # defines a singleton method in A
     puts("ok")
@@ -851,14 +864,14 @@ class B < A
 end
 
 B.test()  # calls it
-
+``` This can't occur with singleton methods from objects that are not classes. In other words, classes are the only ones handled specially. In the following section we'll see how class methods are inherited. -h3. Singleton class of a class +### Singleton class of a class Assuming that class methods are inherited, where is this operation done? It must be done either at class definition (creation) or at singleton @@ -868,7 +881,7 @@ Then let's first look at the code defining classes. Class definition means of course `rb_define_class()`. Now let's take the call graph of this function. -
+```TODO-lang
 rb_define_class
     rb_class_inherited
     rb_define_class_id
@@ -877,7 +890,7 @@ rb_define_class
         rb_make_metaclass
             rb_class_boot
             rb_singleton_class_attached
-
+``` If you're wondering where you've seen it before, we looked at it in the previous section. At that time you did not see it but if you look @@ -888,12 +901,12 @@ Furthermore, why is the lower level `rb_make_metaclass()` used instead of `rb_singleton_class()`? It looks like we have to check these surroundings again. -h4. `rb_define_class_id()` +#### `rb_define_class_id()` Let's first start our reading with its caller, `rb_define_class_id()`. ▼ `rb_define_class_id()` -
+```TODO-lang
  160  VALUE
  161  rb_define_class_id(id, super)
  162      ID id;
@@ -910,7 +923,7 @@ Let's first start our reading with its caller, `rb_define_class_id()`.
  173  }
 
 (class.c)
-
+``` `rb_class_new()` was a function that creates a class with `super` as its superclass. `rb_name_class()`'s name means it names a class, but @@ -919,24 +932,24 @@ that there's the `rb_make_metaclass()` in question. I'm concerned by the fact that when called from `rb_singleton_class()`, the parameters were different. Last time was like this: -
+```TODO-lang
 rb_make_metaclass(obj, RBASIC(obj)->klass);
-
+``` But this time is like this: -
+```TODO-lang
 rb_make_metaclass(klass, RBASIC(super)->klass);
-
+``` So as you can see it's slightly different. How do the results change depending on that? Let's have once again a look at a simplified `rb_make_metaclass()`. -h4. `rb_make_metaclass` (once more) +#### `rb_make_metaclass` (once more) ▼ `rb_make_metaclass` (after first compression) -
+```TODO-lang
 rb_make_metaclass(obj, super)
 {
     klass = create a class with super as superclass;
@@ -953,21 +966,21 @@ rb_make_metaclass(obj, super)
 
     return klass;
 }
-
+``` Last time, the `if` statement was wholly skipped, but looking once again, something is done only for `T_CLASS`, in other words classes. This clearly looks important. In `rb_define_class_id()`, as it's called like this: -
+```TODO-lang
 rb_make_metaclass(klass, RBASIC(super)->klass);
-
+``` Let's expand `rb_make_metaclass()`'s parameter variables with the actual values. ▼ `rb_make_metaclass` (recompression) -
+```TODO-lang
 rb_make_metaclass(klass, super_klass /* == RBASIC(super)->klass */)
 {
     sclass = create a class with super_class as superclass;
@@ -975,7 +988,7 @@ rb_make_metaclass(klass, super_klass /* == RBASIC(super)->klass */)
     RBASIC(sclass)->klass = sclass;
     return sclass;
 }
-
+``` Doing this as a diagram gives something like figure 4. In it, the names between parentheses are singleton classes. This notation is @@ -984,14 +997,20 @@ that `obj`'s singleton class is written as `(obj)`. And `(klass)` is the singleton class for `klass`. It looks like the singleton class is caught between a class and this class's superclass's class. -!images/ch_class_metaclass.png(Introduction of a class's singleton class)! +
+ figure 4: Introduction of a class's singleton class +
figure 4: Introduction of a class's singleton class
+
By expanding our imagination further from this result, we can think that the superclass's class (the `c` in figure 4) must again be a singleton class. You'll understand with one more inheritance level (figure 5). -!images/ch_class_multi.png(Hierarchy of multi-level inheritance)! +
+ figure 5: Hierarchy of multi-level inheritance +
figure 5: Hierarchy of multi-level inheritance
+
As the relationship between `super` and `klass` is the same as the one between `klass` and `klass2`, `c` must be the singleton class @@ -1000,21 +1019,24 @@ conclusion that `Object`'s class must be `(Object)`. And that's the case in practice. For example, by inheriting like in the following program : -
+```TODO-lang
 class A < Object
 end
 class B < A
 end
-
+``` internally, a structure like figure 6 is created. -!images/ch_class_metatree.png(Class hierarchy and metaclasses)! +
+ figure 6: Class hierarchy and metaclasses +
figure 6: Class hierarchy and metaclasses
+
As classes and their metaclasses are linked and inherit like this, class methods are inherited. -h3. Class of a class of a class +### Class of a class of a class You've understood the working of class methods inheritance, but by doing that, in the opposite some questions have appeared. What is the @@ -1023,7 +1045,10 @@ For this, we can check it by using debuggers. I've made figure 7 from the results of this investigation. -!images/ch_class_mmm.png(Class of a class's singleton class)! +
+ figure 7: Class of a class's singleton class +
figure 7: Class of a class's singleton class
+
A class's singleton class puts itself as its own class. Quite complicated. @@ -1032,9 +1057,9 @@ The second question: the class of `Object` must be `Class`. Didn't I properly confirm this in chapter 1: Ruby language minimum by using `class()` method? -
+```TODO-lang
 p(Object.class())   # Class
-
+``` Certainly, that's the case "at the Ruby level". But "at the C level", it's the singleton class `(Object)`. If `(Object)` does not appear at @@ -1043,7 +1068,7 @@ classes. Let's look at the body of the method, `rb_obj_class()` to confirm that. ▼ `rb_obj_class()` -
+```TODO-lang
   86  VALUE
   87  rb_obj_class(obj)
   88      VALUE obj;
@@ -1062,7 +1087,7 @@ confirm that.
   84  }
 
 (object.c)
-
+``` `CLASS_OF(obj)` returns the `basic.klass` of `obj`. While in `rb_class_real()`, all singleton classes are skipped (advancing @@ -1073,9 +1098,12 @@ chain (figure 8). `I_CLASS` will appear later when we will talk about include. -!images/ch_class_real.png(Singleton class and real class)! +
+ figure 8: Singleton class and real class +
figure 8: Singleton class and real class
+
-h3. Singleton class and metaclass +### Singleton class and metaclass Well, the singleton classes that were introduced in classes is also one type of class, it's a class's class. So it can be called @@ -1096,7 +1124,7 @@ Then finally, even if you understood that some classes are metaclasses, it's not as if there was any concrete gain. I'd like you not to care too much about it. -h3. Bootstrap +### Bootstrap We have nearly finished our talk about classes and metaclasses. But there is still one problem left. It's about the 3 metaobjects @@ -1109,7 +1137,7 @@ in `ruby`, only these 3 classes's creation is handled specially. Then let's look at the code: ▼ `Object`, `Module` and `Class` creation -
+```TODO-lang
 1243  rb_cObject = boot_defclass("Object", 0);
 1244  rb_cModule = boot_defclass("Module", rb_cObject);
 1245  rb_cClass =  boot_defclass("Class",  rb_cModule);
@@ -1119,7 +1147,7 @@ Then let's look at the code:
 1249  metaclass = rb_make_metaclass(rb_cClass, metaclass);
 
 (object.c)
-
+``` First, in the first half, `boot_defclass()` is similar to `rb_class_boot()`, it just creates a class with its given superclass @@ -1131,27 +1159,34 @@ And in the three lines of the second half, `(Object)`, `(Module)` and `rb_make_metaclass()` so there is no problem. With this, the metaobjects' bootstrap is finished. -!images/ch_class_boot1.png(Metaobjects creation)! +
+ figure 9: Metaobjects creation +
figure 9: Metaobjects creation
+
After taking everything into account, it gives us the final shape like figure 10. -!images/ch_class_metaobj.png(Ruby metaobjects)! +
+ figure 10: Ruby metaobjects +
figure 10: Ruby metaobjects
+
-h2. Class names +Class names +=========== In this section, we will analyse how's formed the reciprocal conversion between class and class names, in other words constants. Concretely, we will target `rb_define_class()` and `rb_define_class_under()`. -h3. Name → class +### Name → class First we'll read `rb_defined_class()`. After the end of this function, the class can be found from the constant. ▼ `rb_define_class()` -
+```TODO-lang
  183  VALUE
  184  rb_define_class(name, super)
  185      const char *name;
@@ -1185,7 +1220,7 @@ the class can be found from the constant.
  213  }
 
 (class.c)
-
+``` This can be clearly divided into the two parts: before and after `rb_define_class_id()`. @@ -1219,9 +1254,9 @@ that's the reason of such halfway description around here. Moreover, about this coming after `rb_define_class_id()`, -
+```TODO-lang
 st_add_direct(rb_class_tbl, id, klass);
-
+``` This part assigns the class to the constant. However, whichever way you look at it you do not see that. In fact, top-level classes and modules @@ -1230,7 +1265,7 @@ separated from the other constants and regrouped in `rb_class_tbl()`. The split is slightly related to the GC. It's not essential. -h3. Class → name +### Class → name We understood how the class can be obtained from the class name, but how to do the opposite? By doing things like calling `p` or @@ -1240,16 +1275,16 @@ implemented? In fact this is done by `rb_name_class()` which already appeared a long time ago. The call is around the following: -
+```TODO-lang
 rb_define_class
     rb_define_class_id
         rb_name_class
-
+``` Let's look at its content: ▼ `rb_name_class()` -
+```TODO-lang
  269  void
  270  rb_name_class(klass, id)
  271      VALUE klass;
@@ -1259,7 +1294,7 @@ Let's look at its content:
  275  }
 
 (variable.c)
-
+``` `__classid__` is another instance variable that can't be seen from Ruby. As only `VALUE`s can be put in the instance variable table, the @@ -1267,7 +1302,7 @@ Ruby. As only `VALUE`s can be put in the instance variable table, the That's how we are able to find the constant name from the class. -h3. Nested classes +### Nested classes So, in the case of classes defined at the top-level, we know how works the reciprocal link between name and class. What's left is the case of @@ -1276,7 +1311,7 @@ little more complicated. The function to define these nested classes is `rb_define_class_under()`. ▼ `rb_define_class_under()` -
+```TODO-lang
  215  VALUE
  216  rb_define_class_under(outer, name, super)
  217      VALUE outer;
@@ -1310,7 +1345,7 @@ is `rb_define_class_under()`.
  245  }
 
 (class.c)
-
+``` The structure is like the one of `rb_define_class()`: before the call to `rb_define_class_id()` is the redefinition check, after is the @@ -1319,7 +1354,7 @@ half is pretty boringly similar to `rb_define_class()` so we'll skip it. In the second half, `rb_set_class_path()` is new. We're going to look at it. -h4. `rb_set_class_path()` +#### `rb_set_class_path()` This function gives the name `name` to the class `klass` nested in the class `under`. "class path" means a constant name including all the nesting @@ -1327,7 +1362,7 @@ information starting from top-level, for example "`Net::NetPrivate::Socket`". ▼ `rb_set_class_path()` -
+```TODO-lang
  210  void
  211  rb_set_class_path(klass, under, name)
  212      VALUE klass, under;
@@ -1349,7 +1384,7 @@ information starting from top-level, for example
  226  }
 
 (variable.c)
-
+``` Everything except the last line is the construction of the class path, and the last line makes the class remember its own @@ -1358,25 +1393,25 @@ can't be seen from a Ruby program. In `rb_name_class()` there was `__classid__`, but `id` is different because it does not include nesting information (look at the table below). -
+```TODO-lang
 __classpath__    Net::NetPrivate::Socket
 __classid__                       Socket
-
+``` It means classes defined for example in `rb_defined_class()` all have `__classid__` or `__classpath__` defined. So to find `under`'s classpath we can look up in these instance variables. This is done by `rb_class_path()`. We'll omit its content. -h3. Nameless classes +### Nameless classes Contrary to what I have just said, there are in fact cases in which neither `__classpath__` nor `__classid__` are set. That is because in Ruby you can use a method like the following to create a class. -
+```TODO-lang
 c = Class.new()
-
+``` If a class is created like this, it won't go through `rb_define_class_id()` and the classpath won't be set. In this case, @@ -1385,9 +1420,9 @@ If a class is created like this, it won't go through However, if later it's assigned to a constant, a name will be attached to the class at that moment. -
+```TODO-lang
 SomeClass = c   # the class name is SomeClass
-
+``` Strictly speaking, at the first time requesting the name after assigning it to a constant, the name will be attached to the class. @@ -1396,25 +1431,26 @@ For instance, when calling `p` on this this, a value equal to the class is searched in `rb_class_tbl`, and a name has to be chosen. The following case can also happen: -
+```TODO-lang
 class A
   class B
     C = tmp = Class.new()
     p(tmp)   # here we search for the name
   end
 end
-
+``` so in the worst case we have to search for the whole constant space. However, generally, there aren't many constants so even searching all constants does not take too much time. -h2. Include +Include +======= We only talked about classes so let's finish this chapter with something else and talk about module inclusion. -h3. `rb_include_module` (1) +### `rb_include_module` (1) Includes are done by the ordinary method `Module#include`. Its corresponding function in C is `rb_include_module()`. In fact, to be @@ -1423,18 +1459,18 @@ precise, its body is `rb_mod_include()`, and there implementation finally calls `rb_include_module()`. Mixing what's happening in Ruby and C gives us the following call graph. -
+```TODO-lang
 Module#include (rb_mod_include)
     Module#append_features (rb_mod_append_features)
         rb_include_module
-
+``` Anyway, the manipulations that are usually regarded as inclusions are done by `rb_include_module()`. This function is a little long so we'll look at it a half at a time. ▼ `rb_include_module` (first half) -
+```TODO-lang
       /* include module in class */
  347  void
  348  rb_include_module(klass, module)
@@ -1461,13 +1497,13 @@ a little long so we'll look at it a half at a time.
  369      }
 
 (class.c)
-
+``` For the moment it's only security and type checking, therefore we can ignore it. The process itself is below: ▼ `rb_include_module` (second half) -
+```TODO-lang
  371      OBJ_INFECT(klass, module);
  372      c = klass;
  373      while (module) {
@@ -1501,19 +1537,19 @@ ignore it. The process itself is below:
  400  }
 
 (class.c)
-
+``` First, what the (A) block does is written in the comment. It seems to be a special condition so let's first skip reading it for now. By extracting the important parts from the rest we get the following: -
+```TODO-lang
 c = klass;
 while (module) {
     c = RCLASS(c)->super = include_class_new(module, RCLASS(c)->super);
     module = RCLASS(module)->super;
 }
-
+``` In other words, it's a repetition of `module`'s `super`. What is in `module`'s `super` must be a module included by `module` (because our @@ -1523,9 +1559,9 @@ what, but at the moment I saw that I felt "Ah, doesn't this look the addition of elements to a list (like LISP's cons)?" and it suddenly make the story faster. In other words it's the following form: -
+```TODO-lang
 list = new(item, list)
-
+``` Thinking about this, it seems we can expect that module is inserted between `c` and `c->super`. If it's like this, it fits module's @@ -1533,10 +1569,10 @@ specification. But to be sure of this we have to look at `include_class_new()`. -h3. `include_class_new()` +### `include_class_new()` ▼ `include_class_new()` -
+```TODO-lang
  319  static VALUE
  320  include_class_new(module, super)
  321      VALUE module, super;
@@ -1566,7 +1602,7 @@ h3. `include_class_new()`
  345  }
 
 (class.c)
-
+``` We're lucky there's nothing we do not know. @@ -1585,7 +1621,10 @@ on, without duplicating the table. Later, if a method is added, the module's body and the include class will still have exactly the same methods (figure 11). -!images/ch_class_symbolic.png(Include class)! +
+ figure 11: Include class +
figure 11: Include class
+
If you look closely at (A), the structure type flag is set to T_ICLASS. This seems to be the mark of an include class. This @@ -1597,7 +1636,10 @@ And if you think about joining what this function and wrong. In brief, including is inserting the include class of a module between a class and its superclass (figure 12). -!images/ch_class_include.png(Include)! +
+ figure 12: Include +
figure 12: Include
+
At (D-2) the module is stored in the include class's `klass`. At (D-1), the module's body is taken out... I'd like to say so if possible, @@ -1613,21 +1655,24 @@ example calling a method on the include class would be very bad. So include classes must not be seen from Ruby programs. And in practice all methods skip include classes, with no exception. -h3. Simulation +### Simulation It was complicated so let's look at a concrete example. I'd like you to look at figure 13 (1). We have the `c1` class and the `m1` module that includes `m2`. From there, the changes made to include `m1` in `c1` are (2) and (3). `im`s are of course include classes. -!images/ch_class_simulate.png(Include)! +
+ figure 13: Include +
figure 13: Include
+
-h3. `rb_include_module` (2) +### `rb_include_module` (2) Well, now we can explain the part of `rb_include_module()` we skipped. ▼ `rb_include_module` (avoiding double inclusion) -
+```TODO-lang
  378  /* (A) skip if the superclass already includes module */
  379  for (p = RCLASS(klass)->super; p; p = RCLASS(p)->super) {
  380      switch (BUILTIN_TYPE(p)) {
@@ -1646,7 +1691,7 @@ Well, now we can explain the part of `rb_include_module()` we skipped.
  393  }
 
 (class.c)
-
+``` Among the superclasses of the +klass+ (`p`), if a `p` is `T_ICLASS` (an include class) and has the same method table as the @@ -1662,7 +1707,7 @@ the modules included by it must also already be included... that's what I thought for a moment, but we can have the following context: -
+```TODO-lang
 module M
 end
 module M2
@@ -1677,7 +1722,7 @@ end
 class C
   include M   # I would like here to only add M2
 end
-
+``` To say this conversely, there are cases that a result of `include` is not propagated soon. @@ -1687,4 +1732,3 @@ but in the case of module there is no such thing. Therefore the singleton methods of the module are not inherited by the including class (or module). When you want to also inherit singleton methods, the usual way is to override `Module#append_features`. - diff --git a/contextual.textile b/contextual.md similarity index 94% rename from contextual.textile rename to contextual.md index 4e1f025..6faa4cf 100644 --- a/contextual.textile +++ b/contextual.md @@ -4,14 +4,16 @@ title: Finite-state scanner --- Translated by Peter Zotov
-_I'm very grateful to my employer "Evil Martians":http://evl.ms , who sponsored -the work, and "Nikolay Konovalenko":mailto:nlkonovalenko@gmail.com , who put +_I'm very grateful to my employer [Evil Martians](http://evl.ms) , who sponsored +the work, and [Nikolay Konovalenko](mailto:nlkonovalenko@gmail.com) , who put more effort in this translation than I could ever wish for. Without them, I would be still figuring out what `COND_LEXPOP()` actually does._ -h1. Chapter 11 Finite-state scanner +Chapter 11 Finite-state scanner +------------------------------- -h2. Outline +Outline +======= In theory, the scanner and the parser are completely independent of each other – the scanner is supposed to recognize tokens, while the parser is supposed to @@ -21,17 +23,17 @@ is often necessary to alter the way tokens are recognized or their symbols. In this chapter we will take a look at the way the scanner and the parser cooperate. -h3. Practical examples +### Practical examples In most programming languages, spaces don’t have any specific meaning unless they are used to separate words. However, Ruby is not an ordinary language and meanings can change significantly depending on the presence of spaces. Here is an example -
+```TODO-lang
 a[i] = 1      # a[i] = (1)
 a [i]         # a([i])
-
+``` The former is an example of assigning an index. The latter is an example of omitting the method call parentheses and passing a member of an array to a @@ -39,10 +41,10 @@ parameter. Here is another example. -
+```TODO-lang
 a  +  1    # (a) + (1)
 a  +1      # a(+1)
-
+``` This seems to be really disliked by some. @@ -50,10 +52,10 @@ However, the above examples might give one the impression that only omitting the method call parentheses can be a source of trouble. Let’s look at a different example. -
+```TODO-lang
 `cvs diff parse.y`          # command call string
 obj.`("cvs diff parse.y")   # normal method call
-
+``` Here, the former is a method call using a literal. In contrast, the latter is a normal method call (with ''' being the method name). Depending on the context, @@ -61,14 +63,14 @@ they could be handled quite differently. Below is another example where the functioning changes dramatically -
+```TODO-lang
 print(<
+```
 
 The former is a method call using a here-document. The latter is a method call
 using an operator.
@@ -78,7 +80,7 @@ implement in practice. I couldn’t realistically give a thorough description of
 all in just one chapter, so in this one I will look at the basic principles and
 those parts which present the most difficulty.
 
-h3. `lex_state`
+### `lex_state`
 
 There is a variable called “lex_state”. “lex”, obviously, stands for “lexer”.
 Thus, it is a variable which shows the scanner’s state.
@@ -86,7 +88,7 @@ Thus, it is a variable which shows the scanner’s state.
 What states are there? Let’s look at the definitions.
 
 ▼ `enum lex_state`
-
+```TODO-lang
   61  static enum lex_state {
   62      EXPR_BEG,      /* ignore newline, +/- is a sign. */
   63      EXPR_END,      /* newline significant, +/- is a operator. */
@@ -100,12 +102,12 @@ What states are there? Let’s look at the definitions.
   71  } lex_state;
 
 (parse.y)
-
+``` The EXPR prefix stands for “expression”. `EXPR_BEG` is “Beginning of expression” and `EXPR_DOT` is “inside the expression, after the dot”. -To elaborate, `EXPR_BEG` denotes “Located at the head of the expression”. +To elaborate, `EXPR_BEG` denotes “Located at the head of the expression”. `EXPR_END` denotes “Located at the end of the expression”. `EXPR_ARG` denotes “Before the method parameter”. `EXPR_FNAME` denotes “Before the method name (such as `def`)”. The ones not covered here will be analyzed in detail below. @@ -126,7 +128,7 @@ look at the scanner as a state machine. However, delving there would be veering off topic and too tedious. I would refer any interested readers to any textbook on data structures. -h3. Understanding the finite-state scanner +### Understanding the finite-state scanner The trick to reading a finite-state scanner is to not try to grasp everything at once. Someone writing a parser would prefer not to use a finite-state @@ -146,7 +148,7 @@ finite-state scanner, that objective would undoubtedly be to understand every state. For example, what kind of state is `EXPR_BEG`? It is a state where the parser is at the head of the expression. -h4. The static approach +#### The static approach So, how can we understand what a state does? There are three basic approaches @@ -173,7 +175,10 @@ export their location, for example, such as `'#'` and `'*'` and `'!'` of `yylex()` Then we need to recall the state prior to the transition and consider which case suits best (see image 1) -!images/ch_contextual_transittobeg.jpg(Transition to `EXPR_BEG`)! +
+ figure 1: Transition to `EXPR_BEG` +
figure 1: Transition to `EXPR_BEG`
+
((errata:
1. Actually when the state is `EXPR_DOT`, the state after reading a @@ -190,7 +195,7 @@ This does indeed look like the head of statement. Especially the `'\n'` and the `';'` The open parentheses and the comma also suggest that it’s the head not just of the statement, but of the expression as well. -h4. The dynamic approach +#### The dynamic approach There are other easy methods to observe the functioning. For example, you can use a debugger to “hook” the `yylex()` and look at the `lex_state` @@ -205,7 +210,7 @@ The overall process looks like this: use a debugger or the aforementioned tool to observe the functioning of the program. Then look at the source code to confirm the acquired data and use it. -h3. Description of states +### Description of states Here I will give simple descriptions of `lex_state` states. @@ -269,9 +274,10 @@ They all express similar conditions. `EXPR_CLASS` is a little different, but only appears in a limited number of places, not warranting any special attention. -h2. Line-break handling +Line-break handling +=================== -h3. The problem +### The problem In Ruby, a statement does not necessarily require a terminator. In C or Java a statement must always end with a semicolon, but Ruby has no such requirement. @@ -288,7 +294,7 @@ follows: Etc. -h3. Implementation +### Implementation So, what do we need to implement this grammar? Simply having the scanner ignore line-breaks is not sufficient. In a grammar like Ruby’s, where statements are @@ -315,7 +321,7 @@ needs to continued, the `\n` will be ignored, and when it needs to be terminated, the `\n` is passed as a token. In the `yylex()` this is found here: ▼ `yylex()`-`'\n'` -
+```TODO-lang
 3155        case '\n':
 3156          switch (lex_state) {
 3157            case EXPR_BEG:
@@ -331,7 +337,7 @@ terminated, the `\n` is passed as a token. In the `yylex()` this is found here:
 3167          return '\n';
 
 (parse.y)
-
+``` With `EXPR_BEG`, `EXPR_FNAME`, `EXPR_DOT`, `EXPR_CLASS` it will be `goto retry`. That is to say, it’s meaningless and shall be ignored. The label `retry` is @@ -346,7 +352,7 @@ trying to grasp too many things at once will only end in needless confusion. Let us now take a look at some examples using the `rubylex-analyser` tool. -
+```TODO-lang
 % rubylex-analyser -e '
 m(a,
   b, c) unless i
@@ -368,7 +374,7 @@ EXPR_END    S    "unless"  kUNLESS_MOD          EXPR_BEG
 EXPR_BEG    S         "i"  tIDENTIFIER          EXPR_ARG
 EXPR_ARG             "\n"  \n                   EXPR_BEG
 EXPR_BEG     C       "\n"  '                    EXPR_BEG
-
+``` As you can see, there is a lot of output here, but we only need the left and middle columns. The left column displays the `lex_state` before it enters the @@ -383,7 +389,7 @@ That is because the state is `EXPR_ARG` And that is how it should be used. Let us have another example. -
+```TODO-lang
 % rubylex-analyser -e 'class
 C < Object
 end'
@@ -396,12 +402,12 @@ EXPR_BEG    S    "Object"  tCONSTANT            EXPR_ARG
 EXPR_ARG             "\n"  \n                   EXPR_BEG
 EXPR_BEG     C      "end"  kEND                 EXPR_END
 EXPR_END             "\n"  \n                   EXPR_BEG
-
+``` The reserved word `class` is followed by `EXPR_CLASS` so the line-break is ignored. However, the superclass `Object` is followed by `EXPR_ARG`, so the `\n` appears. -
+```TODO-lang
 % rubylex-analyser -e 'obj.
 class'
 +EXPR_BEG
@@ -409,16 +415,17 @@ EXPR_BEG     C      "obj"  tIDENTIFIER          EXPR_CMDARG
 EXPR_CMDARG           "."  '.'                  EXPR_DOT
 EXPR_DOT        "\nclass"  tIDENTIFIER          EXPR_ARG
 EXPR_ARG             "\n"  \n                   EXPR_BEG
-
+``` `'.'` is followed by `EXPR_DOT` so the `\n` is ignored. Note that `class` becomes `tIDENTIFIER` despite being a reserved word. This is discussed in the next section. -h2. Reserved words and identical method names +Reserved words and identical method names +========================================= -h3. The problem +### The problem In Ruby, reserved words can used as method names. However, in actuality it’s not as simple as “it can be used” – there exist three possible contexts: @@ -449,12 +456,12 @@ the reserved word that comes after `def` or `.` or `:` For the latter, make that into a rule. Ruby allows for both solutions to be used in each of the three cases. -h3. Method definition +### Method definition The name part of the method definition. This is handled by the parser. ▼ Method definition rule -
+```TODO-lang
                 | kDEF fname
                   f_arglist
                   bodystmt
@@ -463,31 +470,31 @@ The name part of the method definition. This is handled by the parser.
                   f_arglist
                   bodystmt
                   kEND
-
+``` There exist only two rules for method definition – one for normal methods and one for singleton methods. For both, the name part is `fname` and it is defined as follows. ▼ `fname` -
+```TODO-lang
 fname           : tIDENTIFIER
                 | tCONSTANT
                 | tFID
                 | op
                 | reswords
-
+``` `reswords` is a reserved word and `op` is a binary operator. Both rules consist of simply all terminal symbols lined up, so I won’t go into detail here. Finally, for `tFID` the end contains symbols similarly to `gsub!` and `include?` -h3. Method call +### Method call Method calls with names identical to reserved words are handled by the scanner. The scan code for reserved words is shown below. -
+```TODO-lang
 Scanning the identifier
 result = (tIDENTIFIER or tCONSTANT)
 
@@ -498,19 +505,19 @@ if (lex_state != EXPR_DOT) {
     kw = rb_reserved_word(tok(), toklen());
     Reserved word is processed
 }
-
+``` `EXPR_DOT` expresses what comes after the method call dot. Under `EXPR_DOT` reserved words are universally not processed. The symbol for reserved words after the dot becomes either `tIDENTIFIER` or `tCONSTANT`. -h3. Symbols +### Symbols Reserved word symbols are handled by both the scanner and the parser. First, the rule. ▼ `symbol` -
+```TODO-lang
 symbol          : tSYMBEG sym
 
 sym             : fname
@@ -523,7 +530,7 @@ fname           : tIDENTIFIER
                 | tFID
                 | op
                 | reswords
-
+``` Reserved words (`reswords`) are explicitly passed through the parser. This is only possible because the special terminal symbol `tSYMBEG` is present at the @@ -536,7 +543,7 @@ scanner. ▼ `yylex`-`':'` -
+```TODO-lang
 3761        case ':':
 3762          c = nextc();
 3763          if (c == ':') {
@@ -559,7 +566,7 @@ scanner.
 3778          return tSYMBEG;
 
 (parse.y)
-
+``` This is a situation when the `if` in the first half has two consecutive `':'` In this situation, the `'::'`is scanned in accordance with the leftmost longest @@ -577,14 +584,15 @@ When none of the above applies, it’s all symbols. In that case, a transition t danger to parsing here, but if this is forgotten, the scanner will not pass values to reserved words and value calculation will be disrupted. -h2. Modifiers +Modifiers +========= -h3. The problem +### The problem For example, for `if` if there exists a normal notation and one for postfix modification. -
+```TODO-lang
 # Normal notation
 if cond then
   expr
@@ -592,36 +600,36 @@ end
 
 # Postfix
 expr if cond
-
+``` This could cause a conflict. The reason can be guessed – again, it’s because method parentheses have been omitted previously. Observe this example -
+```TODO-lang
 call if cond then a else b end
-
+``` Reading this expression up to the `if` gives us two possible interpretations. -
+```TODO-lang
 call((if ....))
 call() if ....
-
+``` When unsure, I recommend simply using trial and error and seeing if a conflict occurs. Let us try to handle it with `yacc` after changing `kIF_MOD` to `kIF` in the grammar. -
+```TODO-lang
 % yacc parse.y
 parse.y contains 4 shift/reduce conflicts and 13 reduce/reduce conflicts.
-
+``` As expected, conflicts are aplenty. If you are interested, you add the option `-v` to `yacc` and build a log. The nature of the conflicts should be shown there in great detail. -h3. Implementation +### Implementation So, what is there to do? In Ruby, on the symbol level (that is, on the scanner level) the normal `if` is distinguished from the postfix `if` by them being @@ -630,7 +638,7 @@ operators. In all, there are five - `kUNLESS_MOD kUNTIL_MOD kWHILE_MOD` `kRESCUE_MOD` and `kIF_MOD` The distinction is made here: ▼ `yylex`-Reserved word -
+```TODO-lang
 4173                  struct kwtable *kw;
 4174
 4175                  /* See if it is a reserved word.  */
@@ -659,7 +667,7 @@ operators. In all, there are five - `kUNLESS_MOD kUNTIL_MOD kWHILE_MOD`
 4198                  }
 
 (parse.y)
-
+``` This is located at the end of `yylex` after the identifiers are scanned. The part that handles modifiers is the last (innermost) `if`〜`else` Whether @@ -672,11 +680,11 @@ structure defined in `keywords` and the hash function `rb_reserved_word()` is created by `gperf`. I’ll show the structure here again. ▼ `keywords` - `struct kwtable` -
+```TODO-lang
    1  struct kwtable {char *name; int id[2]; enum lex_state state;};
 
 (keywords)
-
+``` I’ve already explained about `name` and `id[0]` - they are the reserved word name and its symbol. Here I will speak about the remaining members. @@ -691,7 +699,7 @@ should occur after the reserved word is read. Below is a list created in the `kwstat.rb` tool which I made. The tool can be found on the CD. -
+```TODO-lang
 % kwstat.rb ruby/keywords
 ---- EXPR_ARG
 defined?  super     yield
@@ -715,11 +723,12 @@ break   next    rescue  return
 
 ---- modifiers
 if      rescue  unless  until   while
-
+``` -h2. The `do` conflict +The `do` conflict +================= -h3. The problem +### The problem There are two iterator forms - `do`〜`end` and `{`〜`}` Their difference is in priority - `{`〜`}` has a much higher priority. A higher priority means that as @@ -730,34 +739,34 @@ in `stmt` By the way, there has been a request for an expression like this: -
+```TODO-lang
 m do .... end + m do .... end
-
+``` To allow for this, put the `do`〜`end` iterator in `arg` or `primary`. Incidentally, the condition for `while` is `expr`, meaning it contains `arg` and `primary`, so the `do` will cause a conflict here. Basically, it looks like this: -
+```TODO-lang
 while m do
   ....
 end
-
+``` At first glance, the `do` looks like the `do` of `while`. However, a closer look reveals that it could be a `m do`〜`end` bundling. Something that’s not obvious even to a person will definitely cause `yacc` to conflict. Let’s try it in practice. -
+```TODO-lang
 /* do conflict experiment */
 %token kWHILE kDO tIDENTIFIER kEND
 %%
 expr: kWHILE expr kDO expr kEND
     | tIDENTIFIER
     | tIDENTIFIER kDO expr kEND
-
+``` I simplified the example to only include `while`, variable referencing and iterators. This rule causes a shift/reduce conflict if the head of the @@ -776,12 +785,12 @@ However, not putting `do`〜`end` into `expr` is not a realistic goal. That would require all rules for `expr` (as well as for `arg` and `primary`) to be repeated. This leaves us only the scanner solution. -h3. Rule-level solution +### Rule-level solution Below is a simplified example of a relevant rule. ▼ `do` symbol -
+```TODO-lang
 primary         : kWHILE expr_value do compstmt kEND
 
 do              : term
@@ -792,21 +801,21 @@ primary         : operation brace_block
 
 brace_block     : '{' opt_block_var compstmt '}'
                 | kDO opt_block_var compstmt kEND
-
+``` As you can see, the terminal symbols for the `do` of `while` and for the iterator `do` are different. For the former it’s `kDO_COND` while for the latter it’s `kDO` Then it’s simply a matter of pointing that distinction out to the scanner. -h3. Symbol-level solution +### Symbol-level solution Below is a partial view of the `yylex` section that processes reserved words. It’s the only part tasked with processing `do` so looking at this code should be enough to understand the criteria for making the distinction. ▼ `yylex`-Identifier-Reserved word -
+```TODO-lang
 4183                      if (kw->id[0] == kDO) {
 4184                          if (COND_P()) return kDO_COND;
 4185                          if (CMDARG_P() && state != EXPR_CMDARG)
@@ -817,11 +826,11 @@ be enough to understand the criteria for making the distinction.
 4190                      }
 
 (parse.y)
-
+``` It’s a little messy, but you only need the part associated with `kDO_COND`. That is because only two comparisons are meaningful. -The first is the comparison between `kDO_COND` and `kDO`/`kDO_BLOCK` +The first is the comparison between `kDO_COND` and `kDO`/`kDO_BLOCK` The second is the comparison between `kDO` and `kDO_BLOCK`. The rest are meaningless. Right now we only need to distinguish the conditional `do` - leave all the @@ -829,14 +838,14 @@ other conditions alone. Basically, `COND_P()` is the key. -h3. `COND_P()` +### `COND_P()` -h4. `cond_stack` +#### `cond_stack` `COND_P()` is defined close to the head of `parse.y` ▼ `cond_stack` -
+```TODO-lang
   75  #ifdef HAVE_LONG_LONG
   76  typedef unsigned LONG_LONG stack_type;
   77  #else
@@ -854,7 +863,7 @@ h4. `cond_stack`
   89  #define COND_P() (cond_stack&1)
 
 (parse.y)
-
+``` The type `stack_type` is either `long` (over 32 bit) or `long long` (over 64 bit). `cond_stack` is initialized by `yycompile()` at the start of parsing and @@ -864,7 +873,7 @@ those macros. If you look at `COND_PUSH`/`POP` you will see that these macros use integers as stacks consisting of bits. -
+```TODO-lang
 MSB←   →LSB
 ...0000000000         Initial value 0
 ...0000000001         COND_PUSH(1)
@@ -873,7 +882,7 @@ MSB←   →LSB
 ...0000000010         COND_POP()
 ...0000000100         COND_PUSH(0)
 ...0000000010         COND_POP()
-
+``` As for `COND_P()`, since it determines whether or not the least significant bit (LSB) is a 1, it effectively determines whether the head of the stack is a 1. @@ -882,7 +891,7 @@ The remaining `COND_LEXPOP()` is a little weird. It leaves `COND_P()` at the head of the stack and executes a right shift. Basically, it “crushes” the second bit from the bottom with the lowermost bit. -
+```TODO-lang
 MSB←   →LSB
 ...0000000000         Initial value 0
 ...0000000001         COND_PUSH(1)
@@ -891,11 +900,11 @@ MSB←   →LSB
 ...0000000011         COND_LEXPOP()
 ...0000000100         COND_PUSH(0)
 ...0000000010         COND_LEXPOP()
-
+``` ((errata:
It leaves `COND_P()` only when it is 1. -When `COND_P()` is 0 and the second bottom bit is 1, +When `COND_P()` is 0 and the second bottom bit is 1, it would become 1 after doing LEXPOP, thus `COND_P()` is not left in this case. )) @@ -903,12 +912,12 @@ thus `COND_P()` is not left in this case. Now I will explain what that means. -h4. Investigating the function +#### Investigating the function Let us investigate the function of this stack. To do that I will list up all the parts where `COND_PUSH() COND_POP()` are used. -
+```TODO-lang
         | kWHILE {COND_PUSH(1);} expr_value do {COND_POP();}
 --
         | kUNTIL {COND_PUSH(1);} expr_value do {COND_POP();}
@@ -938,7 +947,7 @@ the parts where `COND_PUSH() COND_POP()` are used.
       case ')':
         COND_LEXPOP();
         CMDARG_LEXPOP();
-
+``` From this we can derive the following general rules @@ -951,16 +960,19 @@ With this, you should see how to use it. If you think about it for a minute, the name `cond_stack` itself is clearly the name for a macro that determines whether or not it’s on the same level as the conditional expression (see image 2) -!images/ch_contextual_condp.jpg(Changes of `COND_P()`)! +
+ figure 2: Changes of `COND_P( +
figure 2: Changes of `COND_P(
+
Using this trick should also make situations like the one shown below easy to deal with. -
+```TODO-lang
 while (m do .... end)   # do is an iterator do(kDO)
   ....
 end
-
+``` This means that on a 32-bit machine in the absence of `long long` if conditional expressions or parentheses are nested at 32 levels, things could @@ -973,23 +985,24 @@ lookahead to occur, so there’s no purpose to make the distinction between `POP and `LEXPOP`. Basically, at this time it would be correct to say that `COND_LEXPOP()` has no meaning. -h2. `tLPAREN_ARG`(1) +`tLPAREN_ARG`(1) +================ -h3. The problem +### The problem This one is very complicated. It only became workable in Ruby 1.7 and only fairly recently. The core of the issue is interpreting this: -
+```TODO-lang
 call (expr) + 1
-
+``` As one of the following -
+```TODO-lang
 (call(expr)) + 1
 call((expr) + 1)
-
+``` In the past, it was always interpreted as the former. That is, the parentheses were always treated as “Method parameter parentheses”. But since Ruby 1.7 it @@ -999,28 +1012,28 @@ the parentheses become “Parentheses of `expr`” I will also provide an example to explain why the interpretation changed. First, I wrote a statement as follows -
+```TODO-lang
 p m() + 1
-
+``` So far so good. But let’s assume the value returned by `m` is a fraction and there are too many digits. Then we will have it displayed as an integer. -
+```TODO-lang
 p m() + 1 .to_i   # ??
-
+``` Uh-oh, we need parentheses. -
+```TODO-lang
 p (m() + 1).to_i
-
+``` How to interpret this? Up to 1.6 it will be this -
+```TODO-lang
 (p(m() + 1)).to_i
-
+``` The much-needed `to_i` is rendered meaningless, which is unacceptable. To counter that, adding a space between it and the parentheses will cause the @@ -1031,37 +1044,37 @@ revision 1.100(2001-05-31). Thus, it should be relatively prominent when looking at the differences between it and 1.99. This is the command to find the difference. -
+```TODO-lang
 ~/src/ruby % cvs diff -r1.99 -r1.100 parse.y
-
+``` -h3. Investigation +### Investigation First let us look at how the set-up works in reality. Using the `ruby-lexer` tool{`ruby-lexer`: located in `tools/ruby-lexer.tar.gz` on the CD} we can look at the list of symbols corresponding to the program. -
+```TODO-lang
 % ruby-lexer -e 'm(a)'
 tIDENTIFIER '(' tIDENTIFIER ')' '\n'
-
+``` Similarly to Ruby, `-e` is the option to pass the program directly from the command line. With this we can try all kinds of things. Let’s start with the problem at hand – the case where the first parameter is enclosed in parentheses. -
+```TODO-lang
 % ruby-lexer -e 'm (a)'
 tIDENTIFIER tLPAREN_ARG tIDENTIFIER ')' '\n'
-
+``` After adding a space, the symbol of the opening parenthesis became `tLPAREN_ARG`. Now let’s look at normal expression parentheses. -
+```TODO-lang
 % ruby-lexer -e '(a)'
 tLPAREN tIDENTIFIER ')' '\n'
-
+``` For normal expression parentheses it seems to be `tLPAREN`. To sum up: @@ -1073,12 +1086,12 @@ For normal expression parentheses it seems to be `tLPAREN`. To sum up: Thus the focus is distinguishing between the three. For now `tLPAREN_ARG` is the most important. -h3. The case of one parameter +### The case of one parameter We’ll start by looking at the `yylex()` section for `'('` ▼ `yylex`-`'('` -
+```TODO-lang
 3841        case '(':
 3842          command_start = Qtrue;
 3843          if (lex_state == EXPR_BEG || lex_state == EXPR_MID) {
@@ -1099,7 +1112,7 @@ We’ll start by looking at the `yylex()` section for `'('`
 3858          return c;
 
 (parse.y)
-
+``` Since the first `if` is `tLPAREN` we’re looking at a normal expression parenthesis. The distinguishing feature is that `lex_state` is either `BEG` or @@ -1110,10 +1123,10 @@ If there is a space and `lex_state` is either `ARG` or `CMDARG`, basically if it’s before the first parameter, the symbol is not `'('` but `tLPAREN_ARG`. This way, for example, the following situation can be avoided -
+```TODO-lang
 m(              # Parenthesis not preceded by a space. Method parenthesis ('(')
 m arg, (        # Unless first parameter, expression parenthesis (tLPAREN)
-
+``` When it is neither `tLPAREN` nor `tLPAREN_ARG`, the input character `c` is used as is and becomes `'('`. This will definitely be a method call parenthesis. @@ -1122,7 +1135,7 @@ If such a clear distinction is made on the symbol level, no conflict should occur even if rules are written as usual. Simplified, it becomes something like this: -
+```TODO-lang
 stmt         : command_call
 
 method_call  : tIDENTIFIER '(' args ')'    /* Normal method */
@@ -1139,23 +1152,26 @@ arg          : primary
 primary      : tLPAREN compstmt ')'        /* Normal expression parenthesis */
              | tLPAREN_ARG expr ')'        /* First parameter enclosed in parentheses */
              | method_call
-
+``` Now I need you to focus on `method_call` and `command_call` If you leave the `'('` without introducing `tLPAREN_ARG`, then `command_args` will produce `args`, `args` will produce `arg`, `arg` will produce `primary`. Then, `'('` will appear from `tLPAREN_ARG` and conflict with `method_call` (see image 3) -!images/ch_contextual_trees.jpg(`method_call` and `command_call`)! +
+ figure 3: `method_call` and `command_call` +
figure 3: `method_call` and `command_call`
+
-h3. The case of two parameters and more +### The case of two parameters and more One might think that if the parenthesis becomes `tLPAREN_ARG` all will be well. That is not so. For example, consider the following -
+```TODO-lang
 m (a, a, a)
-
+``` Before now, expressions like this one were treated as method calls and did not produce errors. However, if `tLPAREN_ARG` is introduced, the opening @@ -1165,13 +1181,13 @@ of compatibility. Unfortunately, rushing ahead and just adding a rule like -
+```TODO-lang
 command_args : tLPAREN_ARG args ')'
-
+``` will just cause a conflict. Let’s look at the bigger picture and think carefully. -
+```TODO-lang
 stmt         : command_call
              | expr
 
@@ -1192,16 +1208,16 @@ primary      : tLPAREN compstmt ')'
              | method_call
 
 method_call  : tIDENTIFIER '(' args ')'
-
+``` Look at the first rule of `command_args` Here, `args` produces `arg` Then `arg` produces `primary` and out of there comes the `tLPAREN_ARG` rule. And since `expr` contains `arg` and as it is expanded, it becomes like this: -
+```TODO-lang
 command_args : tLPAREN_ARG arg ')'
              | tLPAREN_ARG arg ')'
-
+``` This is a reduce/reduce conflict, which is very bad. @@ -1210,7 +1226,7 @@ have to write to accommodate for that situation specifically. In practice, it’ solved like this: ▼ `command_args` -
+```TODO-lang
 command_args    : open_args
 
 open_args       : call_args
@@ -1246,7 +1262,7 @@ primary         : literal
                 | xstring
                        :
                 | tLPAREN_ARG expr  ')'
-
+``` Here `command_args` is followed by another level - `open_args` which may not be reflected in the rules without consequence. The key is the second and third @@ -1259,9 +1275,9 @@ come out of `expr` it cannot conflict anyway. That wasn’t a very good explanation. To put it simply, in a grammar where this: -
+```TODO-lang
 command_args    : call_args
-
+``` doesn’t work, and only in such a grammar, the next rule is used to make an addition. Thus, the best way to think here is “In what kind of grammar would @@ -1271,23 +1287,23 @@ limited further and the best way to think is “In what kind of grammar does thi rule not work when a `tIDENTIFIER tLPAREN_ARG` line appears?” Below are a few examples. -
+```TODO-lang
 m (a, a)
-
+``` This is a situation when the `tLPAREN_ARG` list contains two or more items. -
+```TODO-lang
 m ()
-
+``` Conversely, this is a situation when the `tLPAREN_ARG` list is empty. -
+```TODO-lang
 m (*args)
 m (&block)
 m (k => v)
-
+``` This is a situation when the `tLPAREN_ARG` list contains a special expression (one not present in `expr`). @@ -1296,15 +1312,15 @@ This should be sufficient for most cases. Now let’s compare the above with a practical implementation. ▼ `open_args`(1) -
+```TODO-lang
 open_args       : call_args
                 | tLPAREN_ARG   ')'
-
+``` First, the rule deals with empty lists ▼ `open_args`(2) -
+```TODO-lang
                 | tLPAREN_ARG call_args2  ')'
 
 call_args2      : arg_value ',' args opt_block_arg
@@ -1320,35 +1336,36 @@ call_args2      : arg_value ',' args opt_block_arg
                                   tSTAR arg_value opt_block_arg
                 | tSTAR arg_value opt_block_arg
                 | block_arg
-
+``` And `call_args2` deals with elements containing special types such as `assocs`, passing of arrays or passing of blocks. With this, the scope is now sufficiently broad. -h2. `tLPAREN_ARG`(2) +`tLPAREN_ARG`(2) +================ -h3. The problem +### The problem In the previous section I said that the examples provided should be sufficient for “most” special method call expressions. I said “most” because iterators are still not covered. For example, the below statement will not work: -
+```TODO-lang
 m (a) {....}
 m (a) do .... end
-
+``` In this section we will once again look at the previously introduced parts with solving this problem in mind. -h3. Rule-level solution +### Rule-level solution Let us start with the rules. The first part here is all familiar rules, so focus on the `do_block` part ▼ `command_call` -
+```TODO-lang
 command_call    : command
                 | block_command
 
@@ -1366,52 +1383,52 @@ block_call      : command do_block
 
 do_block        : kDO_BLOCK opt_block_var compstmt '}'
                 | tLBRACE_ARG opt_block_var compstmt '}'
-
+``` Both `do` and `{` are completely new symbols `kDO_BLOCK` and `tLBRACE_ARG`. Why isn’t it `kDO` or `'{'` you ask? In this kind of situation the best answer is an experiment, so we will try replacing `kDO_BLOCK` with `kDO` and `tLBRACE_ARG` with `'{'` and processing that with `yacc` -
+```TODO-lang
 % yacc parse.y
 conflicts:  2 shift/reduce, 6 reduce/reduce
-
+``` It conflicts badly. A further investigation reveals that this statement is the cause. -
+```TODO-lang
 m (a), b {....}
-
+``` That is because this kind of statement is already supposed to work. `b{....}` becomes `primary`. And now a rule has been added that concatenates the block with `m` That results in two possible interpretations: -
+```TODO-lang
 m((a), b) {....}
 m((a), (b {....}))
-
+``` This is the cause of the conflict – namely, a 2 shift/reduce conflict. The other conflict has to do with `do`〜`end` -
+```TODO-lang
 m((a)) do .... end     # Add do〜end using block_call
 m((a)) do .... end     # Add do〜end using primary
-
+``` These two conflict. This is 6 reduce/reduce conflict. -h3. `{`〜`}` iterator +### `{`〜`}` iterator This is the important part. As shown previously, you can avoid a conflict by changing the `do` and `'{'` symbols. ▼ `yylex`-`'{'` -
+```TODO-lang
 3884        case '{':
 3885          if (IS_ARG() || lex_state == EXPR_END)
 3886              c = '{';          /* block (primary) */
@@ -1425,34 +1442,34 @@ changing the `do` and `'{'` symbols.
 3894          return c;
 
 (parse.y)
-
+``` `IS_ARG()` is defined as ▼ `IS_ARG` -
+```TODO-lang
 3104  #define IS_ARG() (lex_state == EXPR_ARG || lex_state == EXPR_CMDARG)
 
 (parse.y)
-
+``` Thus, when the state is `EXPR_ENDARG` it will always be false. In other words, when `lex_state` is `EXPR_ENDARG`, it will always become `tLBRACE_ARG`, so the key to everything is the transition to `EXPR_ENDARG`. -h4. `EXPR_ENDARG` +#### `EXPR_ENDARG` Now we need to know how to set `EXPR_ENDARG` I used `grep` to find where it is assigned. ▼ Transition to`EXPR_ENDARG` -
+```TODO-lang
 open_args       : call_args
                 | tLPAREN_ARG  {lex_state = EXPR_ENDARG;} ')'
                 | tLPAREN_ARG call_args2 {lex_state = EXPR_ENDARG;} ')'
 
 primary         : tLPAREN_ARG expr {lex_state = EXPR_ENDARG;} ')'
-
+``` That’s strange. One would expect the transition to `EXPR_ENDARG` to occur after the closing parenthesis corresponding to `tLPAREN_ARG`, but it’s actually @@ -1462,7 +1479,7 @@ other parts setting the `EXPR_ENDARG` but found nothing. Maybe there’s some mistake. Maybe `lex_state` is being changed some other way. Let’s use `rubylex-analyser` to visualize the `lex_state` transition. -
+```TODO-lang
 % rubylex-analyser -e 'm (a) { nil }'
 +EXPR_BEG
 EXPR_BEG     C        "m"  tIDENTIFIER          EXPR_CMDARG
@@ -1484,7 +1501,7 @@ EXPR_END    S         "}"  '}'                  EXPR_END
                                               0:cond lexpop
                                               0:cmd lexpop
 EXPR_END             "\n"  \n                   EXPR_BEG
-
+``` The three big branching lines show the state transition caused by `yylex()`. On the left is the state before `yylex()` The middle two are the word text and @@ -1497,12 +1514,12 @@ for some reason an action is executed after reading the `')'` a transition to actually a pretty high-level technique – generously (ab)using the LALR(1) up to the (1). -h4. Abusing the lookahead +#### Abusing the lookahead `ruby -y` can bring up a detailed display of the `yacc` parser engine. This time we will use it to more closely trace the parser. -
+```TODO-lang
 % ruby -yce 'm (a) {nil}' 2>&1 | egrep '^Reading|Reducing'
 Reducing via rule 1 (line 303),  -> @1
 Reading a token: Next token is 304 (tIDENTIFIER)
@@ -1521,7 +1538,7 @@ Reducing via rule 261 (line 1317), tLPAREN_ARG expr @9 ')'  -> primary
 Reading a token: Next token is 344 (tLBRACE_ARG)
                          :
                          :
-
+``` Here we’re using the option `-c` which stops the process at just compiling and `-e` which allows to give a program from the command line. And we’re using @@ -1532,22 +1549,22 @@ Start by looking at the middle of the list. `')'` is read. Now look at the end this would allow `EXPR_ENDARG ` to be set after the `')'` before the `'{'` But is this always the case? Let’s take another look at the part where it’s set. -
+```TODO-lang
 Rule 1    tLPAREN_ARG  {lex_state = EXPR_ENDARG;} ')'
 Rule 2    tLPAREN_ARG call_args2 {lex_state = EXPR_ENDARG;} ')'
 Rule 3    tLPAREN_ARG expr {lex_state = EXPR_ENDARG;} ')'
-
+``` The embedding action can be substituted with an empty rule. For example, we can rewrite this using rule 1 with no change in meaning whatsoever. -
+```TODO-lang
 target  : tLPAREN_ARG tmp ')'
 tmp     :
             {
                 lex_state = EXPR_ENDARG;
             }
-
+``` Assuming that this is before `tmp`, it’s possible that one terminal symbol will be read by lookahead. Thus we can skip the (empty) `tmp` and read the next. @@ -1555,20 +1572,20 @@ And if we are certain that lookahead will occur, the assignment to `lex_state` is guaranteed to change to `EXPR_ENDARG` after `')'` But is `')'` certain to be read by lookahead in this rule? -h4. Ascertaining lookahead +#### Ascertaining lookahead This is actually pretty clear. Think about the following input. -
+```TODO-lang
 m () { nil }        # A
 m (a) { nil }       # B
 m (a,b,c) { nil }   # C
-
+``` I also took the opportunity to rewrite the rule to make it easier to understand (with no actual changes). -
+```TODO-lang
 rule1: tLPAREN_ARG             e1  ')'
 rule2: tLPAREN_ARG  one_arg    e2  ')'
 rule3: tLPAREN_ARG  more_args  e3  ')'
@@ -1576,13 +1593,13 @@ rule3: tLPAREN_ARG  more_args  e3  ')'
 e1:   /* empty */
 e2:   /* empty */
 e3:   /* empty */
-
+``` First, the case of input A. Reading up to -
+```TODO-lang
 m (         # ... tLPAREN_ARG
-
+``` we arrive before the `e1`. If `e1` is reduced here, another rule cannot be chosen anymore. Thus, a lookahead occurs to confirm whether to reduce `e1` and @@ -1592,16 +1609,16 @@ by lookahead. On to input B. First, reading up to here -
+```TODO-lang
 m (         # ... tLPAREN_ARG
-
+``` Here a lookahead occurs for the same reason as described above. Further reading up to here -
+```TODO-lang
 m (a        # ... tLPAREN_ARG '(' tIDENTIFIER
-
+``` Another lookahead occurs. It occurs because depending on whether what follows is a `','` or a `')'` a decision is made between `rule2` and `rule3` If what @@ -1619,9 +1636,9 @@ should be excluded when building a parser as it is a conflict. Proceeding to input C. -
+```TODO-lang
 m (a, b, c
-
+``` At this point anything other than `rule3` is unlikely so we’re not expecting a lookahead. And yet, that is wrong. If the following is `'('` then it’s a method @@ -1632,9 +1649,9 @@ elements instead of embedding action reduction. But what about the other inputs? For example, what if the third parameter is a method call? -
+```TODO-lang
 m (a, b, c(....)    # ... ',' method_call
-
+``` Once again a lookahead is necessary because a choice needs to be made between shift and reduction depending on whether what follows is `','` or `')'`. Thus, @@ -1644,42 +1661,42 @@ executed. This is quite complicated and more than a little impressive. But would it be possible to set `lex_state` using a normal action instead of an embedding action? For example, like this: -
+```TODO-lang
                 | tLPAREN_ARG ')' { lex_state = EXPR_ENDARG; }
-
+``` This won’t do because another lookahead is likely to occur before the action is reduced. This time the lookahead works to our disadvantage. With this it should be clear that abusing the lookahead of a LALR parser is pretty tricky and not something a novice should be doing. -h3. `do`〜`end` iterator +### `do`〜`end` iterator So far we’ve dealt with the `{`〜`}` iterator, but we still have `do`〜`end` left. Since they’re both iterators, one would expect the same solutions to work, but it isn’t so. The priorities are different. For example, -
+```TODO-lang
 m a, b {....}          # m(a, (b{....}))
 m a, b do .... end     # m(a, b) do....end
-
+``` Thus it’s only appropriate to deal with them differently. That said, in some situations the same solutions do apply. The example below is one such situation -
+```TODO-lang
 m (a) {....}
 m (a) do .... end
-
+``` In the end, our only option is to look at the real thing. Since we’re dealing with `do` here, we should look in the part of `yylex()` that handles reserved words. ▼ `yylex`-Identifiers-Reserved words-`do` -
+```TODO-lang
 4183                      if (kw->id[0] == kDO) {
 4184                          if (COND_P()) return kDO_COND;
 4185                          if (CMDARG_P() && state != EXPR_CMDARG)
@@ -1690,7 +1707,7 @@ that handles reserved words.
 4190                      }
 
 (parse.y)
-
+``` This time we only need the part that distinguishes between `kDO_BLOCK` and `kDO`. Ignore `kDO_COND` Only look at what’s always relevant in a finite-state scanner. @@ -1703,19 +1720,19 @@ action is probably to make it `kDO_BLOCK` In the following case, priorities should have an influence. (But it does not in the actual code. It means this is a bug.) -
-m m (a) { ... } # This should be interpreted as m(m(a) {...}), 
+```TODO-lang
+m m (a) { ... } # This should be interpreted as m(m(a) {...}),
                 # but is interpreted as m(m(a)) {...}
-m m (a) do ... end # as the same as this: m(m(a)) do ... end 
-
+m m (a) do ... end # as the same as this: m(m(a)) do ... end +``` )) The problem lies with `CMDARG_P()` and `EXPR_CMDARG`. Let’s look at both. -h4. `CMDARG_P()` +#### `CMDARG_P()` ▼ `cmdarg_stack` -
+```TODO-lang
   91  static stack_type cmdarg_stack = 0;
   92  #define CMDARG_PUSH(n) (cmdarg_stack = (cmdarg_stack<<1)|((n)&1))
   93  #define CMDARG_POP() (cmdarg_stack >>= 1)
@@ -1727,14 +1744,14 @@ h4. `CMDARG_P()`
   99  #define CMDARG_P() (cmdarg_stack&1)
 
 (parse.y)
-
+``` The structure and interface (macro) of `cmdarg_stack` is completely identical to `cond_stack`. It’s a stack of bits. Since it’s the same, we can use the same means to investigate it. Let’s list up the places which use it. First, during the action we have this: -
+```TODO-lang
 command_args    :  {
                         $$ = cmdarg_stack;
                         CMDARG_PUSH(1);
@@ -1745,7 +1762,7 @@ command_args    :  {
                         cmdarg_stack = $1;
                         $$ = $2;
                     }
-
+``` `$$` represents the left value with a forced casting. In this case it comes out as the value of the embedding action itself, so it can be produced in @@ -1767,13 +1784,13 @@ Consider both, and it can be said that when `command_args` , a parameter for a method call with parentheses omitted, is not enclosed in parentheses `CMDARG_P()` is true. -h4. `EXPR_CMDARG` +#### `EXPR_CMDARG` Now let’s take a look at one more condition - `EXPR_CMDARG` Like before, let us look for place where a transition to `EXPR_CMDARG` occurs. ▼ `yylex`-Identifiers-State Transitions -
+```TODO-lang
 4201              if (lex_state == EXPR_BEG ||
 4202                  lex_state == EXPR_MID ||
 4203                  lex_state == EXPR_DOT ||
@@ -1789,7 +1806,7 @@ Like before, let us look for place where a transition to `EXPR_CMDARG` occurs.
 4213              }
 
 (parse.y)
-
+``` This is code that handles identifiers inside `yylex()` Leaving aside that there are a bunch of `lex_state` tests in here, let’s look @@ -1797,7 +1814,7 @@ first at `cmd_state` And what is this? ▼ `cmd_state` -
+```TODO-lang
 3106  static int
 3107  yylex()
 3108  {
@@ -1813,7 +1830,7 @@ And what is this?
 3134      command_start = Qfalse;
 
 (parse.y)
-
+``` Turns out it’s an `yylex` local variable. Furthermore, an investigation using `grep` revealed that here is the only place where its value is altered. This @@ -1823,7 +1840,7 @@ single run of `yylex` When does `command_start` become true, then? ▼ `command_start` -
+```TODO-lang
 2327  static int command_start = Qtrue;
 
 2334  static NODE*
@@ -1851,7 +1868,7 @@ When does `command_start` become true, then?
 3842          command_start = Qtrue;
 
 (parse.y)
-
+``` From this we understand that `command_start` becomes true when one of the `parse.y` static variables `\n ; (` is scanned. @@ -1863,7 +1880,7 @@ becomes true. And here is the code in `yylex()` that uses `cmd_state` ▼ `yylex`-Identifiers-State transitions -
+```TODO-lang
 4201              if (lex_state == EXPR_BEG ||
 4202                  lex_state == EXPR_MID ||
 4203                  lex_state == EXPR_DOT ||
@@ -1879,7 +1896,7 @@ And here is the code in `yylex()` that uses `cmd_state`
 4213              }
 
 (parse.y)
-
+``` From this we understand the following: when after `\n ; (` the state is `EXPR_BEG MID DOT ARG CMDARG` and an identifier is read, a transition to @@ -1892,48 +1909,48 @@ Based on the above we can now think of a situation where the state is `EXPR_CMDARG`. For example, see the one below. The underscore is the current position. -
+```TODO-lang
 m _
 m(m _
 m m _
-
+``` ((errata:
The third one "m m _" is not `EXPR_CMDARG`. (It is `EXPR_ARG`.) )) -h4. Conclusion +#### Conclusion Let us now return to the `do` decision code. ▼ `yylex`-Identifiers-Reserved words-`kDO`-`kDO_BLOCK` -
+```TODO-lang
 4185                          if (CMDARG_P() && state != EXPR_CMDARG)
 4186                              return kDO_BLOCK;
 
 (parse.y)
-
+``` Inside the parameter of a method call with parentheses omitted but not before the first parameter. That means from the second parameter of `command_call` onward. Basically, like this: -
+```TODO-lang
 m arg, arg do .... end
 m (arg), arg do .... end
-
+``` Why is the case of `EXPR_CMDARG` excluded? This example should clear It up -
+```TODO-lang
 m do .... end
-
+``` This pattern can already be handled using the `do`〜`end` iterator which uses `kDO` and is defined in `primary` Thus, including that case would cause another conflict. -h3. Reality and truth +### Reality and truth Did you think we’re done? Not yet. Certainly, the theory is now complete, but only if everything that has been @@ -1950,7 +1967,7 @@ inside the parameter of a method call with parentheses omitted. But where exactly is “inside the parameter of a method call with parentheses omitted”? Once again, let us use `rubylex-analyser` to inspect in detail. -
+```TODO-lang
 % rubylex-analyser -e  'm a,a,a,a;'
 +EXPR_BEG
 EXPR_BEG     C        "m"  tIDENTIFIER          EXPR_CMDARG
@@ -1965,7 +1982,7 @@ EXPR_BEG              "a"  tIDENTIFIER          EXPR_ARG
 EXPR_ARG              ";"  ';'                  EXPR_BEG
                                               0:cmd resume
 EXPR_BEG     C       "\n"  '                    EXPR_BEG
-
+``` The `1:cmd push-` in the right column is the push to `cmd_stack`. When the rightmost digit in that line is 1 `CMDARG_P()` become true. To sum up, the @@ -1978,7 +1995,7 @@ To the terminal symbol following the final parameter But, very strictly speaking, even this is still not entirely accurate. -
+```TODO-lang
 % rubylex-analyser -e  'm a(),a,a;'
 +EXPR_BEG
 EXPR_BEG     C        "m"  tIDENTIFIER          EXPR_CMDARG
@@ -1997,7 +2014,7 @@ EXPR_BEG              "a"  tIDENTIFIER          EXPR_ARG
 EXPR_ARG              ";"  ';'                  EXPR_BEG
                                               0:cmd resume
 EXPR_BEG     C       "\n"  '                    EXPR_BEG
-
+``` When the first terminal symbol of the first parameter has been read, `CMDARG_P()` is true. Therefore, the complete answer would be: @@ -2010,12 +2027,12 @@ To the terminal symbol following the final parameter What repercussions does this fact have? Recall the code that uses `CMDARG_P()` ▼ `yylex`-Identifiers-Reserved words-`kDO`-`kDO_BLOCK` -
+```TODO-lang
 4185                          if (CMDARG_P() && state != EXPR_CMDARG)
 4186                              return kDO_BLOCK;
 
 (parse.y)
-
+``` `EXPR_CMDARG` stands for “Before the first parameter of `command_call`” and is excluded. But wait, this meaning is also included in `CMDARG_P()`. @@ -2053,13 +2070,13 @@ dynamic analyses are done so many times. )) -h4. Still not the end +#### Still not the end Another thing I forgot. I can’t end the chapter without explaining why `CMDARG_P()` takes that value. Here’s the problematic part: ▼ `command_args` -
+```TODO-lang
 1209  command_args    :  {
 1210                          $$ = cmdarg_stack;
 1211                          CMDARG_PUSH(1);
@@ -2074,14 +2091,14 @@ Another thing I forgot. I can’t  end the chapter without explaining why
 1221  open_args       : call_args
 
 (parse.y)
-
+``` All things considered, this looks like another influence from lookahead. `command_args` is always in the following context: -
+```TODO-lang
 tIDENTIFIER _
-
+``` Thus, this looks like a variable reference or a method call. If it’s a variable reference, it needs to be reduced to `variable` and if it’s a method call it @@ -2093,7 +2110,7 @@ read, `CMDARG_PUSH()` is executed. The reason why `POP` and `LEXPOP` exist separately in `cmdarg_stack` is also here. Observe the following example: -
+```TODO-lang
 % rubylex-analyser -e 'm m (a), a'
 -e:1: warning: parenthesize argument(s) for future version
 +EXPR_BEG
@@ -2114,18 +2131,18 @@ EXPR_BEG    S         "a"  tIDENTIFIER          EXPR_ARG
 EXPR_ARG             "\n"  \n                   EXPR_BEG
                                              10:cmd resume
                                               0:cmd resume
-
+``` Looking only at the parts related to `cmd` and how they correspond to each other… -
+```TODO-lang
   1:cmd push-       parserpush(1)
  10:cmd push        scannerpush
 101:cmd push-       parserpush(2)
  11:cmd lexpop      scannerpop
  10:cmd resume      parserpop(2)
   0:cmd resume      parserpop(1)
-
+``` The `cmd push-` with a minus sign at the end is a parser push. Basically, `push` and `pop` do not correspond. Originally there were supposed to be two diff --git a/evaluator.textile b/evaluator.md similarity index 96% rename from evaluator.textile rename to evaluator.md index b9670b7..b051fba 100644 --- a/evaluator.textile +++ b/evaluator.md @@ -5,10 +5,11 @@ title: "Chapter 13: Structure of the evaluator" h1(#chapter). Chapter 13: Structure of the evaluator -h2. Outline +Outline +======= -h3. Interface +### Interface We are not familiar with the word "Hyo-ka-ki" (evaluator). Literally, it must @@ -57,7 +58,7 @@ I'd like you to remember. -h3. The characteristics of @ruby's@ evaluator. +### The characteristics of @ruby's@ evaluator. The biggest characteristic of `ruby`'s evaluator is that, as this is also of @@ -84,10 +85,10 @@ it is named after "evaluate". By using it, you can even do something like this: -
+```TODO-lang
 lvar = 1
 answer = eval("lvar + lvar")    # the answer is 2
-
+``` There are also @Module#module_eval@ and @Object#instance_eval@, each method @@ -95,7 +96,7 @@ behaves slightly differently. I'll describe about them in detail in Chapter 17: -h3. @eval.c@ +### @eval.c@ The evaluator is implemented in @eval.c@. However, this @eval.c@ is a @@ -169,10 +170,10 @@ The below table shows the corresponding chapter of each of them: -h3. From @main@ by way of @ruby_run@ to @rb_eval@ +### From @main@ by way of @ruby_run@ to @rb_eval@ -h3. Call Graph +### Call Graph The true core of the evaluator is a function called @rb_eval()@. @@ -180,7 +181,7 @@ In this chapter, we will follow the path from @main()@ to that @rb_eval()@. First of all, here is a rough call graph around @rb_eval@ : -
+```TODO-lang
 main                     ....main.c
     ruby_init                ....eval.c
         ruby_prog_init           ....ruby.c
@@ -191,7 +192,7 @@ main                     ....main.c
             rb_eval
                 *
         ruby_stop
-
+``` I put the file names on the right side when moving to another file. @@ -220,11 +221,11 @@ Actually, @PUSH_TAG@ can only be used as a pair with @POP_TAG@ as follows: -
+```TODO-lang
 PUSH_TAG();
 /* do lots of things */
 POP_TAG();
-
+``` Because of its implementation, the two macros should be put into the same function. It's possible to implement in a way to be able to divide them into different functions, @@ -264,7 +265,7 @@ the official interfaces of @ruby@ interpretor. -h3. @main()@ +### @main()@ First, straightforwardly, I'll start with @main()@. @@ -273,7 +274,7 @@ It is nice that this is very short.

▼ @main()@

-
+```TODO-lang
   36  int
   37  main(argc, argv, envp)
   38      int argc;
@@ -293,7 +294,7 @@ It is nice that this is very short.
   52  }
 
 (main.c)
-
+``` @#if def NT@ is obviously the NT of Windows NT. But somehow NT is also @@ -326,7 +327,7 @@ Now, I'll start to briefly explain about the built-in Ruby interfaces. -h3. @ruby_init()@ +### @ruby_init()@ @ruby_init()@ initializes the Ruby interpretor. @@ -355,7 +356,7 @@ The code of @ruby_init()@ is omitted because it's unnecessary to read. -h3. @ruby_options()@ +### @ruby_options()@ What to parse command-line options for the Ruby interpreter is @ruby_options()@. @@ -380,7 +381,7 @@ things one by one and not interesting. -h3. @ruby_run()@ +### @ruby_run()@ Finally, @ruby_run()@ starts to evaluate the syntax tree which was set to @ruby_eval_tree@. @@ -390,7 +391,7 @@ for instance, we can evaluate a string by using a function named @rb_eval_string

▼ @ruby_run()@

-
+```TODO-lang
 1257  void
 1258  ruby_run()
 1259  {
@@ -414,7 +415,7 @@ for instance, we can evaluate a string by using a function named @rb_eval_string
 1277  }
 
 (eval.c)
-
+``` We can see the macros @PUSH_xxxx()@, but we can ignore them for now. I'll @@ -424,7 +425,7 @@ is only @eval_node()@. Its content is:

▼ @eval_node()@

-
+```TODO-lang
 1112  static VALUE
 1113  eval_node(self, node)
 1114      VALUE self;
@@ -442,7 +443,7 @@ is only @eval_node()@. Its content is:
 1126  }
 
 (eval.c)
-
+``` This calls @rb_eval()@ on @ruby_eval_tree@. The @ruby_eval_tree_begin@ is storing the statements registered by @BEGIN@. But, this is also not important. @@ -455,10 +456,11 @@ This is also not important, so we won't see this. -h2. @rb_eval()@ +@rb_eval()@ +=========== -h3. Outline +### Outline Now, @rb_eval()@. This function is exactly the real core of @ruby@. @@ -478,7 +480,7 @@ branching by each type of the nodes. First, let's look at the outline.

▼ @rb_eval()@ Outline

-
+```TODO-lang
 2221  static VALUE
 2222  rb_eval(self, n)
 2223      VALUE self;
@@ -517,7 +519,7 @@ branching by each type of the nodes. First, let's look at the outline.
 3422  }
 
 (eval.c)
-
+``` In the omitted part, plenty of the codes to process all nodes are listed. @@ -536,7 +538,7 @@ And finally, the local variables @result@ and @node@ are @volatile@ for GC. -h3. @NODE_IF@ +### @NODE_IF@ Now, taking the @if@ statement as an example, let's look at the process of @@ -552,18 +554,18 @@ these three will be listed at the beginning.

▼source program

-
+```TODO-lang
 if true
   'true expr'
 else
   'false expr'
 end
-
+```

▼ its corresponding syntax tree ( @nodedump@ )

-
+```TODO-lang
 NODE_NEWLINE
 nd_file = "if"
 nd_nth  = 1
@@ -585,7 +587,7 @@ nd_next:
         nd_next:
             NODE_STR
             nd_lit = "false expr":String
-
+``` As we've seen in Part 2, @elsif@ and @unless@ can be, by contriving the ways to assemble, @@ -594,7 +596,7 @@ bundled to a single @NODE_IF@ type, so we don't have to treat them specially.

▼ @rb_eval()@ − @NODE_IF@

-
+```TODO-lang
 2324  case NODE_IF:
 2325    if (trace_func) {
 2326        call_trace_func("line", node, self,
@@ -610,7 +612,7 @@ bundled to a single @NODE_IF@ type, so we don't have to treat them specially.
 2336    goto again;
 
 (eval.c)
-
+``` Only the last @if@ statement is important. @@ -618,14 +620,14 @@ If rewriting it without any change in its meaning, it becomes this: -
+```TODO-lang
 if (RTEST(rb_eval(self, node->nd_cond))) {     (A)
     RETURN(rb_eval(self, node->nd_body));      (B)
 }
 else {
     RETURN(rb_eval(self, node->nd_else));      (C)
 }
-
+``` First, at (A), evaluating (the node of) the Ruby's condition statement and @@ -649,7 +651,7 @@ in the previous chapter "Syntax tree construction". -h3. @NODE_NEW_LINE@ +### @NODE_NEW_LINE@ Since there was @NODE_NEWLINE@ at the node for a @if@ statement, @@ -658,7 +660,7 @@ let's look at the code for it.

▼ @rb_eval()@ - @NODE_NEWLINE@

-
+```TODO-lang
 3404  case NODE_NEWLINE:
 3405    ruby_sourcefile = node->nd_file;
 3406    ruby_sourceline = node->nd_nth;
@@ -671,7 +673,7 @@ let's look at the code for it.
 3413    goto again;
 
 (eval.c)
-
+``` There's nothing particularly difficult. @@ -698,7 +700,7 @@ challenge after finishing the Chapter 16: Blocks. -h3. Pseudo-local Variables +### Pseudo-local Variables @NODE_IF@ and such are interior nodes in a syntax tree. @@ -707,7 +709,7 @@ Let's look at the leaves, too.

▼ @rb_eval()@ Ppseudo-Local Variable Nodes

-
+```TODO-lang
 2312  case NODE_SELF:
 2313    RETURN(self);
 2314
@@ -721,7 +723,7 @@ Let's look at the leaves, too.
 2322    RETURN(Qfalse);
 
 (eval.c)
-
+``` We've seen @self@ as the argument of @rb_eval()@. I'd like you to make sure it @@ -731,7 +733,7 @@ The others are probably not needed to be explained. -h3. Jump Tag +### Jump Tag Next, I'd like to explain @NODE_WHILE@ which is corresponding to @while@, @@ -751,7 +753,7 @@ The entry point is @parent()@.

▼ @setjmp()@ and @longjmp()@

-
+```TODO-lang
 jmp_buf buf;
 
 void child2(void) {
@@ -775,7 +777,7 @@ void parent(void) {
         printf("%d\n", result);   /* shows 34 */
     }
 }
-
+``` First, when @setjmp()@ is called at @parent()@, @@ -828,7 +830,7 @@ Let's look at it.

▼ @struct tag@

-
+```TODO-lang
  783  struct tag {
  784      jmp_buf buf;
  785      struct FRAME *frame;   /* FRAME when PUSH_TAG */
@@ -841,7 +843,7 @@ Let's look at it.
  792  };
 
 (eval.c)
-
+``` Because there's the member @prev@, we can infer that @struct tag@ is probably @@ -851,7 +853,7 @@ find the macros @PUSH_TAG()@ and @POP_TAG@, thus it definitely seems a stack.

▼ @PUSH_TAG() POP_TAG()@

-
+```TODO-lang
  793  static struct tag *prot_tag;   /* the pointer to the head of the machine stack */
 
  795  #define PUSH_TAG(ptag) do {             \
@@ -872,7 +874,7 @@ find the macros @PUSH_TAG()@ and @POP_TAG@, thus it definitely seems a stack.
  822  } while (0)
 
 (eval.c)
-
+``` I'd like you to be flabbergasted here because the actual tag is fully allocated @@ -883,7 +885,7 @@ Here is the macros @PUSH@ / @POP@ coupled and extracted to make it easy to read. -
+```TODO-lang
 do {
     struct tag _tag;
     _tag.prev = prot_tag;   /* save the previous tag */
@@ -891,7 +893,7 @@ do {
     /* do several things */
     prot_tag = _tag.prev;   /* restore the previous tag */
 } while (0);
-
+``` This method does not have any overhead of function calls, @@ -918,7 +920,7 @@ Additionally, let's also take a look at @EXEC_TAG()@ and @JUMP_TAG()@.

▼ @EXEC_TAG() JUMP_TAG()@

-
+```TODO-lang
  810  #define EXEC_TAG()    setjmp(prot_tag->buf)
 
  812  #define JUMP_TAG(st) do {               \
@@ -928,7 +930,7 @@ Additionally, let's also take a look at @EXEC_TAG()@ and @JUMP_TAG()@.
  816  } while (0)
 
 (eval.c)
-
+``` In this way, @setjmp@ and @longjmp@ are wrapped by @EXEC_TAG()@ and @JUMP_TAG()@ respectively. @@ -967,7 +969,7 @@ of @longjmp()@, thus we can use this. The types are expressed by the following f

▼tag type

-
+```TODO-lang
  828  #define TAG_RETURN      0x1    /* return */
  829  #define TAG_BREAK       0x2    /* break */
  830  #define TAG_NEXT        0x3    /* next */
@@ -979,7 +981,7 @@ of @longjmp()@, thus we can use this. The types are expressed by the following f
  836  #define TAG_MASK        0xf
 
 (eval.c)
-
+``` The meanings are written as each comment. The last @TAG_MASK@ is the bitmask to @@ -990,7 +992,7 @@ return value of @setjmp()@ can also include information which is not about a -h3. @NODE_WHILE@ +### @NODE_WHILE@ Now, by examining the code of @NODE_WHILE@, let's check the actual usage of tags. @@ -998,16 +1000,16 @@ Now, by examining the code of @NODE_WHILE@, let's check the actual usage of tags

▼ The Source Program

-
+```TODO-lang
 while true
   'true_expr'
 end
-
+```

▼ Its corresponding syntax tree( @nodedump-short@ )

-
+```TODO-lang
 NODE_WHILE
 nd_state = 1 (while)
 nd_cond:
@@ -1015,12 +1017,12 @@ nd_cond:
 nd_body:
     NODE_STR
     nd_lit = "true_expr":String
-
+```

▼ @rb_eval@ - @NODE_WHILE@

-
+```TODO-lang
 2418  case NODE_WHILE:
 2419    PUSH_TAG(PROT_NONE);
 2420    result = Qnil;
@@ -1054,14 +1056,14 @@ nd_body:
 2448    RETURN(result);
 
 (eval.c)
-
+``` The idiom which will appear over and over again appeared in the above code. -
+```TODO-lang
 PUSH_TAG(PROT_NONE);
 switch (state = EXEC_TAG()) {
   case 0:
@@ -1080,7 +1082,7 @@ switch (state = EXEC_TAG()) {
 }
 POP_TAG();
 if (state) JUMP_TAG(state);   /* .. jump again here */
-
+``` First, as @PUSH_TAG()@ and @POP_TAG()@ are the previously described mechanism, @@ -1101,14 +1103,14 @@ The below code is the handler of the node of @redo@.

▼ @rb_eval()@ - @NODE_REDO@

-
+```TODO-lang
 2560  case NODE_REDO:
 2561    CHECK_INTS;
 2562    JUMP_TAG(TAG_REDO);
 2563    break;
 
 (eval.c)
-
+``` As a result of jumping via @JUMP_TAG()@, it goes back to the last @EXEC_TAG()@. @@ -1123,14 +1125,14 @@ Additionally, I moved some labels to enhance readability. -
+```TODO-lang
   if (node->nd_state && !RTEST(rb_eval(self, node->nd_cond)))
       goto while_out;
   do {
       rb_eval(self, node->nd_body);
   } while (RTEST(rb_eval(self, node->nd_cond)));
 while_out:
-
+``` There are the two places calling @rb_eval()@ on @node->nd_state@ which @@ -1148,10 +1150,10 @@ So, I've actually tried it. -
+```TODO-lang
 % ruby -e 'while next do nil end'
 -e:1: void value expression
-
+``` It's simply rejected at the time of parsing. @@ -1161,7 +1163,7 @@ What produces this error is @value_expr()@ of @parse.y@. -h3. The value of an evaluation of @while@ +### The value of an evaluation of @while@ @while@ had not had its value for a long time, but it has been able to return @@ -1172,7 +1174,7 @@ return value of @rb_eval()@, I'd like you to look at the following code: -
+```TODO-lang
         result = Qnil;
         switch (state = EXEC_TAG()) {
           case 0:
@@ -1188,7 +1190,7 @@ return value of @rb_eval()@, I'd like you to look at the following code:
             break;
         }
         RETURN(result);
-
+``` What we should focus on is only (A). The return value of the jump seems to be @@ -1198,7 +1200,7 @@ Here is the passing side:

▼ @rb_eval()@ - @NODE_BREAK@

-
+```TODO-lang
 2219  #define return_value(v) prot_tag->retval = (v)
 
 2539  case NODE_BREAK:
@@ -1212,7 +1214,7 @@ Here is the passing side:
 2547    break;
 
 (eval.c)
-
+``` In this way, by using the macro @return_value()@, it assigns the value to the @@ -1225,14 +1227,14 @@ For example, @rescue@ of an exception handling can exist between them. -
+```TODO-lang
 while cond       # EXEC_TAG() for NODE_WHILE
   begin          # EXEC_TAG() again for rescue
     break 1
   rescue
   end
 end
-
+``` Therefore, it's hard to determine whether or not the @strict tag@ of when doing @@ -1243,7 +1245,7 @@ the return value can be passed to the next tag without particular thought.

▼ @POP_TAG()@

-
+```TODO-lang
  818  #define POP_TAG()                       \
  819      if (_tag.prev)                      \
  820          _tag.prev->retval = _tag.retval;\
@@ -1251,7 +1253,7 @@ the return value can be passed to the next tag without particular thought.
  822  } while (0)
 
 (eval.c)
-
+``` @@ -1268,14 +1270,15 @@ Fig.6: Transferring the return value -h2. Exception +Exception +========= As the second example of the usage of "tag jump", we'll look at how exceptions are dealt with. -h3. @raise@ +### @raise@ When I explained @while@, we looked at the @setjmp()@ side first. This time, @@ -1285,7 +1288,7 @@ which is the substance of @raise@.

▼ @rb_exc_raise()@

-
+```TODO-lang
 3645  void
 3646  rb_exc_raise(mesg)
 3647      VALUE mesg;
@@ -1294,7 +1297,7 @@ which is the substance of @raise@.
 3650  }
 
 (eval.c)
-
+``` @mesg@ is an exception object (an instance of @Exception@ or one of its subclass). @@ -1304,7 +1307,7 @@ And the below code is very simplified @rb_longjmp()@.

▼ @rb_longjmp()@ (simplified)

-
+```TODO-lang
 static void
 rb_longjmp(tag, mesg)
     int tag;
@@ -1316,7 +1319,7 @@ rb_longjmp(tag, mesg)
     ruby_errinfo = mesg;
     JUMP_TAG(tag);
 }
-
+``` Well, though this can be considered as a matter of course, this is just to jump @@ -1331,12 +1334,12 @@ naturally its substance @ruby_errinfo@ should have the same meaning as well. -h3. The Big Picture +### The Big Picture

▼the source program

-
+```TODO-lang
 begin
   raise('exception raised')
 rescue
@@ -1344,12 +1347,12 @@ rescue
 ensure
   'ensure clause'
 end
-
+```

▼the syntax tree( @nodedump-short@ )

-
+```TODO-lang
 NODE_BEGIN
 nd_body:
     NODE_ENSURE
@@ -1375,7 +1378,7 @@ nd_body:
     nd_ensr:
         NODE_STR
         nd_lit = "ensure clause":String
-
+``` As the right order of @rescue@ and @ensure@ is decided at parser level, @@ -1395,7 +1398,7 @@ accurate to say. -h3. @ensure@ +### @ensure@ We are going to look at the handler of @NODE_ENSURE@ which is the node of @ensure@. @@ -1403,7 +1406,7 @@ We are going to look at the handler of @NODE_ENSURE@ which is the node of @ensur

▼ @rb_eval()@ - @NODE_ENSURE@

-
+```TODO-lang
 2634  case NODE_ENSURE:
 2635    PUSH_TAG(PROT_NONE);
 2636    if ((state = EXEC_TAG()) == 0) {
@@ -1422,7 +1425,7 @@ We are going to look at the handler of @NODE_ENSURE@ which is the node of @ensur
 2649    break;
 
 (eval.c)
-
+``` This branch using @if@ is another idiom to deal with tag. @@ -1435,13 +1438,13 @@ To check the specification first, -
+```TODO-lang
 begin
   expr0
 ensure
   expr1
 end
-
+``` for the above statement, the value of the whole @begin@ will be the value of @@ -1462,7 +1465,7 @@ When any jump has not occurred, @state==0@ in this case, -h3. @rescue@ +### @rescue@ It's been a little while, I'll show the syntax tree of @rescue@ again just in case. @@ -1470,18 +1473,18 @@ It's been a little while, I'll show the syntax tree of @rescue@ again just in ca

▼Source Program

-
+```TODO-lang
 begin
   raise()
 rescue ArgumentError, TypeError
   'error raised'
 end
-
+```

▼ Its Syntax Tree ( @nodedump-short@ )

-
+```TODO-lang
 NODE_BEGIN
 nd_body:
     NODE_RESCUE
@@ -1505,7 +1508,7 @@ nd_body:
             nd_lit = "error raised":String
         nd_head = (null)
     nd_else = (null)
-
+``` I'd like you to make sure that (the syntax tree of) the statement to be @@ -1514,7 +1517,7 @@ I'd like you to make sure that (the syntax tree of) the statement to be

▼ @rb_eval()@ - @NODE_RESCUE@

-
+```TODO-lang
 2590  case NODE_RESCUE:
 2591  retry_entry:
 2592    {
@@ -1560,7 +1563,7 @@ I'd like you to make sure that (the syntax tree of) the statement to be
 2632    break;
 
 (eval.c)
-
+``` Even though the size is not small, it's not difficult because it only simply @@ -1572,9 +1575,9 @@ I'll explain only its effects here. Its prototype is this, -
+```TODO-lang
 static int handle_rescue(VALUE self, NODE *resq)
-
+``` and it determines whether the currently occurring exception (@ruby_errinfo@) is diff --git a/fin.textile b/fin.md similarity index 96% rename from fin.textile rename to fin.md index af65b38..860272e 100644 --- a/fin.textile +++ b/fin.md @@ -2,9 +2,11 @@ layout: default --- -h1. Final Chapter: Ruby's future +Final Chapter: Ruby's future +---------------------------- -h2. Issues to be addressed +Issues to be addressed +====================== `ruby` isn't 'completely finished' software. It's still being developed, there are still a lot of issues. Firstly, we want to try removing @@ -14,7 +16,7 @@ The order of the topics is mostly in the same order as the chapters of this book. -h3. Performance of GC +### Performance of GC The performance of the current GC might be "not notably bad, but not notably good". @@ -41,7 +43,7 @@ However, if such application will actually be created in the future, there might be the necessity to consider Incremental GC. -h3. Implementation of parser +### Implementation of parser As we saw in Part 2, the implementation of `ruby` parser has already utilized `yacc`'s ability to almost its limit, thus I can't think it can endure further @@ -51,7 +53,7 @@ and it's sad if we could not express another demanded grammar because of the limitation of `yacc`. -h3. Reuse of parser +### Reuse of parser Ruby's parser is very complex. In particular, dealing with around `lex_state` seriously is very hard. Due to this, embedding a Ruby program or creating a @@ -84,14 +86,14 @@ while parsing. -h3. Hiding Code +### Hiding Code With current `ruby`, it does not work without the source code of the program to run. Thus, people who don't want others to read their source code might have trouble. -h3. Interpretor Object +### Interpretor Object Currently each process cannot have multiple `ruby` interpretors, this was discussed in Chapter 13. @@ -99,7 +101,7 @@ If having multiple interpretors is practically possible, it seems better, but is it possible to implement such thing? -h3. The structure of evaluator +### The structure of evaluator Current `eval.c` is, above all, too complex. Embedding Ruby's stack frames to machine stack could occasionally become the @@ -110,7 +112,7 @@ aggressively can easily cause slowing down because `setjmp()` set aside all things in registers. -h3. The performance of evaluator +### The performance of evaluator `ruby` is already enough fast for ordinary use. But aside from it, regarding a language processor, @@ -120,7 +122,7 @@ what can we do? In such case, the first thing we have to do is profiling. So I profiled. -
+```TODO-lang
   %   cumulative   self              self     total
  time   seconds   seconds    calls  ms/call  ms/call  name
  20.25      1.64     1.64  2638359     0.00     0.00  rb_eval
@@ -132,7 +134,7 @@ So I profiled.
   5.19      5.27     0.42   388066     0.00     0.00  st_foreach
   3.46      5.55     0.28  8605866     0.00     0.00  rb_gc_mark
   2.22      5.73     0.18  3819588     0.00     0.00  call_cfunc
-
+``` This is a profile when running some application but this is approximately the profile of a general Ruby program. @@ -152,7 +154,7 @@ In other words, without changing the way of thinking fundamentally, there's no room to improve. -h3. The implementation of thread +### The implementation of thread This was also discussed in Chapter 19. There are really a lot of issues about the implementation of the current ruby's thread. Particularly, it cannot mix @@ -164,13 +166,14 @@ cannot continue to use eternally, isn't it? -h2. `ruby` 2 +`ruby` 2 +======== Subsequently, on the other hand, I'll introduce the trend of the original `ruby`, how it is trying to counter these issues. -h3. Rite +### Rite At the present time, ruby's edge is 1.6.7 as the stable version and 1.7.3 as the development version, but perhaps the next stable version 1.8 will come out in @@ -195,7 +198,7 @@ entirely just a "plan". If you expect so much, it's possible it will turn out disappointments. Therefore, for now, let's just expect slightly. -h3. The language to write +### The language to write Firstly, the language to use. Definitely it will be C. Mr. Matsumoto said to `ruby-talk`, which is the English mailing list for Ruby, @@ -210,7 +213,7 @@ so not to increase extra efforts around this is necessary. However, chances are good that it will be ANSI C next time. -h3. GC +### GC Regarding the implementation of GC, the good start point would be @@ -223,7 +226,7 @@ perpetually, but anyway it will proceed for the direction to which we can expect somewhat improvement on speed. -h3. Parser +### Parser Regarding the specification, it's very likely that the nested method calls without parentheses will be forbidden. As we've seen, `command_call` has a great @@ -237,7 +240,7 @@ possible to implement such complex thing by hand? Such anxiety might left. Whichever way we choose, the path must be thorny. -h3. Evaluator +### Evaluator The evaluator will be completely recreated. Its aims are mainly to improve speed and to simplify the implementation. @@ -285,7 +288,7 @@ For another example, Python is a bytecode interpretor. -h3. Thread +### Thread Regarding thread, the thing is native thread support. The environment around thread has been significantly improved, @@ -308,7 +311,7 @@ and it is rarely actually used. Therefore there might be no problem. -h3. M17N +### M17N In addition, I'd like to mention a few things about class libraries. This is about multi-lingualization (M17N for short). @@ -325,7 +328,7 @@ it will be absorbed at some point in the middle of 1.9. -h3. IO +### IO The `IO` class in current Ruby is a simple wrapper of `stdio`, @@ -342,7 +345,8 @@ Therefore, it seems Rite will have its own `stdio`. -h2. Ruby Hacking Guide +Ruby Hacking Guide +================== So far, we've always acted as observers who look at `ruby` from outside. @@ -353,7 +357,7 @@ I'll introduce the suggestions and activities for `ruby` from community, as a farewell gift for Ruby Hackers both at present and in the future. -h3. Generational GC +### Generational GC First, as also mentioned in Chapter 5, the generational GC made by Mr. Kiyama Masato. @@ -368,7 +372,7 @@ more than anything else, it was the first large non-official patch. -h3. Oniguruma +### Oniguruma The regular expression engine used by current Ruby is a remodeled version of GNU regex. That GNU regex was in the first place written for Emacs. And then it was @@ -386,13 +390,13 @@ absorbed as soon as possible. You can obtain Oniguruma from the `ruby`'s CVS repository in the following way. -
+```TODO-lang
 % cvs -d :pserver:anonymous@cvs.ruby-lang.org:/src co oniguruma
-
+``` -h3. ripper +### ripper Next, ripper is my product. It is an extension library made by remodeling `parse.y`. It is not a change applied to the `ruby`'s main body, but I @@ -411,14 +415,14 @@ if this is accounted, I think it is constructed well. It took only three days or so to implement, really just a piece of cake. -h3. A parser alternative +### A parser alternative This product has not yet appeared in a clear form, there's a person who write a Ruby parser in C++ which can be used totally independent of `ruby`. (`[ruby-talk:50497]`). -h3. JRuby +### JRuby More aggressively, there's an attempt to rewrite entire the interpretor. For example, a Ruby written in Java, @@ -452,7 +456,7 @@ However, the overall impression I got was, it's way better than I imagined. -h3. NETRuby +### NETRuby If it can run with Java, it should also with C#. Therefore, a Ruby written in C# appeared, @@ -471,7 +475,7 @@ such things are the problems. But `instance_eval` is in effect (astounding!). -h3. How to join `ruby` development +### How to join `ruby` development `ruby`'s developer is really Mr. Matsumoto as an individual, regarding the final decision about the direction `ruby` will take, @@ -523,7 +527,7 @@ I'll answer it as much as possible, and other people would respond to it, too. -h3. Finale +### Finale The long journey of this book is going to end now. As there was the limitation of the number of pages, diff --git a/gc.textile b/gc.md similarity index 94% rename from gc.textile rename to gc.md index d77a918..ebe9b92 100644 --- a/gc.textile +++ b/gc.md @@ -4,9 +4,11 @@ title: Garbage Collection --- Translated by Sebastian Krause & ocha- -h1. Chapter 5: Garbage Collection +Chapter 5: Garbage Collection +----------------------------- -h2. A conception of an executing program +A conception of an executing program +==================================== It's all of a sudden but at the beginning of this chapter, we'll learn about the memory space of an executing program. In this chapter @@ -15,14 +17,14 @@ preliminary knowledge it'll be hard to follow. And it'll be also necessary for the following chapters. Once we finish this here, the rest will be easier. -h3. Memory Segments +### Memory Segments A general C program has the following parts in the memory space: -# the text area -# a place for static and global variables -# the machine stack -# the heap +* the text area +* a place for static and global variables +* the machine stack +* the heap The text area is where the code lies. Obviously the second area holds static and global variables. Arguments and local variables of functions are piling up in the machine stack. @@ -41,14 +43,17 @@ is a function call, one stack frame is pushed. When doing `return`, one stack frame will be popped. Figure 1 shows the really simplified appearance of the machine stack. -!images/ch_gc_macstack.jpg(Machine Stack)! +
+ figure 1: Machine Stack +
figure 1: Machine Stack
+
In this picture, "above" is written above the top of the stack, but this it is not necessarily always the case that the machine stack goes from low addresses to high addresses. For instance, on the x86 machine the stack goes from high to low addresses. -h3. `alloca()` +### `alloca()` By using `malloc()`, we can get an arbitrarily large memory area of the heap. `alloca()` is the machine stack version of it. @@ -81,18 +86,22 @@ if there are the memories allocated for the functions already finished, free them by using `free()`. -!images/ch_gc_calloca.jpg(The behavior of an `alloca()` implemented in C)! +
+ figure 2: The behavior of an `alloca( +
figure 2: The behavior of an `alloca(
+
The @missing/alloca.c@ of @ruby@ is an example of an emulated @alloca()@ . -h2. Overview +Overview +======== From here on we can at last talk about the main subject of this chapter: garbage collection. -h3. What is GC? +### What is GC? Objects are normally on top of the memory. Naturally, if a lot of objects are created, a lot of memory is used. If memory were infinite there would be no problem, but in reality there is always a memory @@ -135,7 +144,7 @@ Let's follow the details of `ruby`'s GC in this chapter. The target file is `gc.c`. -h3. What does GC do? +### What does GC do? Before explaining the GC algorithm, I should explain "what garbage collection is". @@ -145,7 +154,10 @@ To make descriptions more concrete, let's simplify the structure by assuming that there are only objects and links. This would look as shown in Figure 3. -!images/ch_gc_objects.jpg(Objects)! +
+ figure 3: Objects +
figure 3: Objects
+
The objects pointed to by global variables and the objects on the stack of a @@ -163,7 +175,10 @@ These objects colored black are the necessary objects. The rest of the objects can be released. -!images/ch_gc_gcimage.jpg(necessary objects and unnecessary objects)! +
+ figure 4: necessary objects and unnecessary objects +
figure 4: necessary objects and unnecessary objects
+
In technical terms, "the surely necessary objects" are called "the roots of GC". @@ -171,7 +186,7 @@ That's because they are the roots of tree structures that emerges as a consequence of tracing necessary objects. -h3. Mark and Sweep +### Mark and Sweep GC was first implemented in Lisp. The GC implemented in Lisp at first, @@ -205,14 +220,17 @@ But this point can be alleviated by modifying the algorithm (it is called increm -h3. Stop and Copy +### Stop and Copy Stop and Copy is a variation of Mark and Sweep. First, prepare several object areas. To simplify this description, assume there are two areas @A@ and @B@ here. And put an "active" mark on the one of the areas. When creating an object, create it only in the "active" one. (Figure 5) -!images/ch_gc_stop2.jpg(Stop and Copy (1))! +
+ figure 5: Stop and Copy (1 +
figure 5: Stop and Copy (1
+
When the GC starts, follow links from the roots in the same manner as @@ -220,7 +238,10 @@ mark-and-sweep. However, move objects to another area instead of marking them (Figure 6). When all the links have been followed, discard the all elements which remain in @A@, and make @B@ active next. -!images/ch_gc_stop3.jpg(Stop and Copy (2))! +
+ figure 6: Stop and Copy (2 +
figure 6: Stop and Copy (2
+
Stop and Copy also has two advantages: @@ -237,7 +258,7 @@ And also two disadvantages: It seems what exist in this world are not only positive things. -h3. Reference counting +### Reference counting Reference counting differs a bit from the aforementioned GCs, the reach-check code is distributed in several places. @@ -249,7 +270,10 @@ increased. When quitting to refer, decrease the counter. When the counter of an object becomes zero, release the object. This is the method called reference counting (Figure 7). -!images/ch_gc_refcnt.jpg(Reference counting)! +
+ figure 7: Reference counting +
figure 7: Reference counting
+
This method also has two advantages: @@ -267,7 +291,10 @@ a cycle of references as shown in Figure 8. If this is the case the counters will never decrease and the objects will never be released. -!images/ch_gc_cycle.jpg(Cycle)! +
+ figure 8: Cycle +
figure 8: Cycle
+
By the way, latest Python(2.2) uses reference counting GC but it can free cycles. @@ -278,7 +305,8 @@ but because it sometimes invokes mark and sweep GC to check. -h2. Object Management +Object Management +================= Ruby's garbage collection is only concerned with ruby objects. Moreover, it only concerned with the objects created and managed by `ruby`. @@ -289,29 +317,29 @@ For instance, the following function will cause a memory leak even if `ruby` is running. -
+```TODO-lang
 void not_ok()
 {
     malloc(1024);  /* receive memory and discard it */
 }
-
+``` However, the following function does not cause a memory leak. -
+```TODO-lang
 void this_is_ok()
 {
     rb_ary_new();  /* create a ruby array and discard it */
 }
-
+``` Since @rb_ary_new()@ uses Ruby's proper interface to allocate memory, the created object is under the management of the GC of `ruby`, thus `ruby` will take care of it. -h3. `struct RVALUE` +### `struct RVALUE` Since the substance of an object is a struct, managing objects means managing that structs. @@ -328,7 +356,7 @@ The declaration of that union is as follows. ▼ `RVALUE` -
+```TODO-lang
  211  typedef struct RVALUE {
  212      union {
  213          struct {
@@ -355,7 +383,7 @@ The declaration of that union is as follows.
  234  } RVALUE;
 
 (gc.c)
-
+``` `struct RVALUE` is a struct that has only one element. I've heard that the reason why `union` is not directly used is to enable to @@ -377,14 +405,14 @@ Hence, we can confirm that setting their flags to `0` is necessity and sufficiency to represent "dead" objects. -h3. Object heap +### Object heap The memory for all the object structs has been brought together in global variable `heaps`. Hereafter, let's call this an object heap. ▼ Object heap -
+```TODO-lang
  239  #define HEAPS_INCREMENT 10
  240  static RVALUE **heaps;
  241  static int heaps_length = 0;
@@ -395,20 +423,26 @@ Hereafter, let's call this an object heap.
  246  static int heap_slots = HEAP_MIN_SLOTS;
 
 (gc.c)
-
+``` @heaps@ is an array of arrays of @struct RVALUE@. Since it is `heapS`, the each contained array is probably each @heap@. Each element of @heap@ is each @slot@ (Figure 9). -!images/ch_gc_heapitems.jpg(`heaps`, `heap`, `slot`)! +
+ figure 9: `heaps`, `heap`, `slot` +
figure 9: `heaps`, `heap`, `slot`
+
The length of @heaps@ is @heap_length@ and it can be changed. The number of the slots actually in use is @heaps_used@. The length of each heap is in the corresponding @heaps_limits[index]@. Figure 10 shows the structure of the object heap. -!images/ch_gc_heaps.jpg(conceptual diagram of `heaps` in memory)! +
+ figure 10: conceptual diagram of `heaps` in memory +
figure 10: conceptual diagram of `heaps` in memory
+
This structure has a necessity to be this way. For instance, if all structs are stored in an array, @@ -442,7 +476,7 @@ and whose position and total amount are not restricted at the same time. -h3. `freelist` +### `freelist` Unused `RVALUE`s are managed by being linked as a single line which is a linked @@ -452,16 +486,16 @@ The `as.free.next` of `RVALUE` is the link used for this purpose. ▼ `freelist` -
+```TODO-lang
  236  static RVALUE *freelist = 0;
 
 (gc.c)
-
+``` -h3. `add_heap()` +### `add_heap()` As we understood the data structure, @@ -472,7 +506,7 @@ I'll show the one simplified by omitting error handlings and castings. ▼ `add_heap()` (simplified) -
+```TODO-lang
 static void
 add_heap()
 {
@@ -502,7 +536,7 @@ add_heap()
         p++;
     }
 }
-
+``` Please check the following points. @@ -520,7 +554,7 @@ These values are used later when determining the integers "which seems `VALUE`". -h3. `rb_newobj()` +### `rb_newobj()` Considering all of the above points, we can tell the way to create an object @@ -532,7 +566,7 @@ Let's confirm this by reading the `rb_newobj()` function to create an object. ▼ `rb_newobj()` -
+```TODO-lang
  297  VALUE
  298  rb_newobj()
  299  {
@@ -547,7 +581,7 @@ Let's confirm this by reading the `rb_newobj()` function to create an object.
  308  }
 
 (gc.c)
-
+``` If `freelest` is 0, in other words, if there's not any unused structs, @@ -557,7 +591,8 @@ there's no problem because in this case a new space is allocated in `rb_gc()`. And take a struct from `freelist`, zerofill it by `MEMZERO()`, and return it. -h2. Mark +Mark +==== As described, `ruby`'s GC is Mark & Sweep. @@ -569,7 +604,7 @@ and free objects that `FL_MARK` has not been set. -h3. `rb_gc_mark()` +### `rb_gc_mark()` `rb_gc_mark()` is the function to mark objects recursively. @@ -577,7 +612,7 @@ h3. `rb_gc_mark()` ▼ `rb_gc_mark()` -
+```TODO-lang
  573  void
  574  rb_gc_mark(ptr)
  575      VALUE ptr;
@@ -609,18 +644,18 @@ h3. `rb_gc_mark()`
  601  }
 
 (gc.c)
-
+``` The definition of @RANY()@ is as follows. It is not particularly important. ▼ `RANY()` -
+```TODO-lang
  295  #define RANY(o) ((RVALUE*)(o))
 
 (gc.c)
-
+``` There are the checks for non-pointers or already freed objects and the recursive @@ -628,9 +663,9 @@ checks for marked objects at the beginning, -
+```TODO-lang
 obj->as.basic.flags |= FL_MARK;
-
+``` and `obj` (this is the `ptr` parameter of this function) is marked. Then next, it's the turn to follow the references from `obj` and mark. @@ -650,7 +685,7 @@ This code is omitted because it is not part of the main line. -h3. `rb_gc_mark_children()` +### `rb_gc_mark_children()` Now, as for `rb_gc_mark_children()`, @@ -661,7 +696,7 @@ Here, it is shown but the simple enumerations are omitted: ▼ `rb_gc_mark_children()` -
+```TODO-lang
  603  void
  604  rb_gc_mark_children(ptr)
  605      VALUE ptr;
@@ -724,7 +759,7 @@ Here, it is shown but the simple enumerations are omitted:
  842  }
 
 (gc.c)
-
+``` It calls `rb_gc_mark()` recursively, is only what I'd like you to confirm. @@ -739,13 +774,13 @@ This code is extracted from the second `switch` statement. ▼ `rb_gc_mark_children()` - `T_DATA` -
+```TODO-lang
  789        case T_DATA:
  790          if (obj->as.data.dmark) (*obj->as.data.dmark)(DATA_PTR(obj));
  791          break;
 
 (gc.c)
-
+``` Here, it does not use `rb_gc_mark()` or similar functions, @@ -758,7 +793,7 @@ contain `VALUE`, there's no need to mark. -h3. `rb_gc()` +### `rb_gc()` By now, we've finished to talk about each object. @@ -769,7 +804,7 @@ In other words, "the roots of GC". ▼ `rb_gc()` -
+```TODO-lang
 1110  void
 1111  rb_gc()
 1112  {
@@ -791,7 +826,7 @@ In other words, "the roots of GC".
 1184  }
 
 (gc.c)
-
+``` The roots which should be marked will be shown one by one after this, @@ -802,7 +837,7 @@ It means that the local variables and arguments of C are automatically marked. For example, -
+```TODO-lang
 static int
 f(void)
 {
@@ -810,7 +845,7 @@ f(void)
 
     /* …… do various things …… */
 }
-
+``` like this way, we can protect an object just by putting it into a variable. @@ -825,7 +860,7 @@ How to resolve this is the key when reading the implementation of GC. -h3. The Ruby Stack +### The Ruby Stack First, it marks the (`ruby`'s) stack frames used by the interpretor. @@ -835,7 +870,7 @@ you don't have to think so much about it for now. ▼ Marking the Ruby Stack -
+```TODO-lang
 1130      /* mark frame stack */
 1131      for (frame = ruby_frame; frame; frame = frame->prev) {
 1132          rb_gc_mark_frame(frame);
@@ -852,7 +887,7 @@ you don't have to think so much about it for now.
 1143      rb_gc_mark((VALUE)ruby_dyna_vars);
 
 (gc.c)
-
+``` `ruby_frame ruby_class ruby_scope ruby_dyna_vars` are the variables to point to each top of the stacks of the evaluator. These hold the frame, the class scope, @@ -860,13 +895,13 @@ the local variable scope, and the block local variables at that time respectively. -h3. Register +### Register Next, it marks the CPU registers. ▼ marking the registers -
+```TODO-lang
 1148      FLUSH_REGISTER_WINDOWS;
 1149      /* Here, all registers must be saved into jmp_buf. */
 1150      setjmp(save_regs_gc_mark);
@@ -874,7 +909,7 @@ Next, it marks the CPU registers.
                                sizeof(save_regs_gc_mark) / sizeof(VALUE *));
 
 (gc.c)
-
+``` `FLUSH_REGISTER_WINDOWS` is special. We will see it later. @@ -896,7 +931,7 @@ to explicitly write out the registers.

▼ the original version of `setjmp`

-
+```TODO-lang
 1072  #ifdef __GNUC__
 1073  #if defined(__human68k__) || defined(DJGPP)
 1074  #if defined(__human68k__)
@@ -936,7 +971,7 @@ to explicitly write out the registers.
 1108  #endif /* __GNUC__ */
 
 (gc.c)
-
+``` Alignment is the constraint when putting variables on memories. @@ -965,12 +1000,12 @@ it will be marked in the next code:

▼ mark the registers (shown again)

-
+```TODO-lang
 1151      mark_locations_array((VALUE*)save_regs_gc_mark,
                                sizeof(save_regs_gc_mark) / sizeof(VALUE *));
 
 (gc.c)
-
+``` This is the first time that `mark_locations_array()` appears. @@ -979,12 +1014,12 @@ I'll describe it in the next section. -h4. `mark_locations_array()` +#### `mark_locations_array()`

▼ `mark_locations_array()`

-
+```TODO-lang
  500  static void
  501  mark_locations_array(x, n)
  502      register VALUE *x;
@@ -999,7 +1034,7 @@ h4. `mark_locations_array()`
  511  }
 
 (gc.c)
-
+``` This function is to mark the all elements of an array, @@ -1020,12 +1055,12 @@ it is `is_pointer_to_heap()`. -h4. `is_pointer_to_heap()` +#### `is_pointer_to_heap()`

▼ `is_pointer_to_heap()`

-
+```TODO-lang
  480  static inline int
  481  is_pointer_to_heap(ptr)
  482      void *ptr;
@@ -1047,7 +1082,7 @@ h4. `is_pointer_to_heap()`
  498  }
 
 (gc.c)
-
+``` @@ -1070,7 +1105,7 @@ to compromise. -h4. Register Window +#### Register Window This section is about `FLUSH_REGISTER_WINDOWS()` which has been deferred. @@ -1089,7 +1124,7 @@ The content of the macro is like this:

▼ `FLUSH_REGISTER_WINDOWS`

-
+```TODO-lang
  125  #if defined(sparc) || defined(__sparc__)
  126  # if defined(linux) || defined(__linux__)
  127  #define FLUSH_REGISTER_WINDOWS  asm("ta  0x83")
@@ -1101,7 +1136,7 @@ The content of the macro is like this:
  133  #endif
 
 (defines.h)
-
+``` `asm(...)` is a built-in assembler. @@ -1121,7 +1156,7 @@ that is also convenient when debugging. -h3. Machine Stack +### Machine Stack Then, let's go back to the rest of `rb_gc()`. @@ -1130,7 +1165,7 @@ This time, it marks `VALUES`s in the machine stack.

▼ mark the machine stack

-
+```TODO-lang
 1152      rb_gc_mark_locations(rb_gc_stack_start, (VALUE*)STACK_END);
 1153  #if defined(__human68k__)
 1154      rb_gc_mark_locations((VALUE*)((char*)rb_gc_stack_start + 2),
@@ -1138,7 +1173,7 @@ This time, it marks `VALUES`s in the machine stack.
 1156  #endif
 
 (gc.c)
-
+``` `rb_gc_stack_start` seems the start address (the end of the stack) and @@ -1159,7 +1194,7 @@ let's examine these three in this order. -h4. `Init_stack()` +#### `Init_stack()` The first thing is `rb_gc_starck_start`. @@ -1170,7 +1205,7 @@ initializing the `ruby` interpretor.

▼ `Init_stack()`

-
+```TODO-lang
 1193  void
 1194  Init_stack(addr)
 1195      VALUE *addr;
@@ -1199,7 +1234,7 @@ initializing the `ruby` interpretor.
 1218  }
 
 (gc.c)
-
+``` What is important is only the part in the middle. @@ -1219,7 +1254,7 @@ We can ignore this. -h4. `STACK_END` +#### `STACK_END` Next, we'll look at the `STACK_END` which is the macro to detect the end of the stack. @@ -1227,7 +1262,7 @@ Next, we'll look at the `STACK_END` which is the macro to detect the end of the

▼ `STACK_END`

-
+```TODO-lang
  345  #ifdef C_ALLOCA
  346  # define SET_STACK_END VALUE stack_end; alloca(0);
  347  # define STACK_END (&stack_end)
@@ -1241,7 +1276,7 @@ Next, we'll look at the `STACK_END` which is the macro to detect the end of the
  355  #endif
 
 (gc.c)
-
+``` As there are three variations of `SET_STACK_END`, let's start with the bottom one. @@ -1278,7 +1313,7 @@ As for `__builtin_frame_adress(0)`, it provides the address of the current frame -h4. `rb_gc_mark_locations()` +#### `rb_gc_mark_locations()` The last one is the `rb_gc_mark_locations()` function that actually marks the stack. @@ -1286,7 +1321,7 @@ The last one is the `rb_gc_mark_locations()` function that actually marks the st

▼ `rb_gc_mark_locations()`

-
+```TODO-lang
  513  void
  514  rb_gc_mark_locations(start, end)
  515      VALUE *start, *end;
@@ -1304,7 +1339,7 @@ The last one is the `rb_gc_mark_locations()` function that actually marks the st
  527  }
 
 (gc.c)
-
+``` Basically, delegating to the function `mark_locations_array()` which marks a @@ -1318,7 +1353,7 @@ Therefore, so that the smaller one becomes `start`, they are adjusted here. -h3. The other root objects +### The other root objects Finally, it marks the built-in `VALUE` containers of the interpretor. @@ -1326,7 +1361,7 @@ Finally, it marks the built-in `VALUE` containers of the interpretor.

▼ The other roots

-
+```TODO-lang
 1159      /* mark the registered global variables */
 1160      for (list = global_List; list; list = list->next) {
 1161          rb_gc_mark(*list->varptr);
@@ -1344,7 +1379,7 @@ Finally, it marks the built-in `VALUE` containers of the interpretor.
 1172      rb_gc_mark_parser();
 
 (gc.c)
-
+``` When putting a `VALUE` into a global variable of C, @@ -1385,10 +1420,11 @@ Until here, the mark phase has been finished. -h2. Sweep +Sweep +===== -h3. The special treatment for `NODE` +### The special treatment for `NODE` The sweep phase is the procedures to find out and free the not-marked objects. @@ -1398,7 +1434,7 @@ Take a look at the next part:

▼ at the beggining of `gc_sweep()`

-
+```TODO-lang
  846  static void
  847  gc_sweep()
  848  {
@@ -1421,7 +1457,7 @@ Take a look at the next part:
  864      }
 
 (gc.c)
-
+``` `NODE` is a object to express a program in the parser. @@ -1436,7 +1472,7 @@ protected from being collected while compiling (`ruby_in_compile`) . -h3. Finalizer +### Finalizer After it has reached here, all not-marked objects can be freed. @@ -1447,7 +1483,7 @@ This hook is called "finalizer".

▼ `gc_sweep()` Middle

-
+```TODO-lang
  869      freelist = 0;
  870      final_list = deferred_final_list;
  871      deferred_final_list = 0;
@@ -1489,7 +1525,7 @@ This hook is called "finalizer".
  907      during_gc = 0;
 
 (gc.c)
-
+``` This checks all over the object heap from the edge, @@ -1518,7 +1554,7 @@ It means that while executing the finalizers, one cannot use the hooked objects.

▼ `gc_sweep()` the rest

-
+```TODO-lang
  910      if (final_list) {
  911          RVALUE *tmp;
  912
@@ -1538,7 +1574,7 @@ It means that while executing the finalizers, one cannot use the hooked objects.
  926  }
 
 (gc.c)
-
+``` The `for` in the last half is the main finalizing procedure. @@ -1550,7 +1586,7 @@ the previous list. -h3. `rb_gc_force_recycle()` +### `rb_gc_force_recycle()` I'll talk about a little different thing at the end. @@ -1561,7 +1597,7 @@ It's `rb_gc_force_recycle()`.

▼ `rb_gc_force_recycle()`

-
+```TODO-lang
  928  void
  929  rb_gc_force_recycle(p)
  930      VALUE p;
@@ -1572,7 +1608,7 @@ It's `rb_gc_force_recycle()`.
  935  }
 
 (gc.c)
-
+``` Its mechanism is not so special, but I introduced this because you'll see it @@ -1582,10 +1618,11 @@ several times in Part 2 and Part 3. -h2. Discussions +Discussions +=========== -h3. To free spaces +### To free spaces The space allocated by an individual object, say, `char[]` of `String`, is @@ -1618,7 +1655,7 @@ The attached CD-ROM also contains the edge `ruby`, so please check by `diff`. -h3. Generational GC +### Generational GC Mark & Sweep has an weak point, it is "it needs to touch the entire object space @@ -1684,7 +1721,10 @@ However, when there are links from old-generation to new-generation, the new-generation objects will not be marked. (Figure 11) -!images/ch_gc_gengc.jpg(reference over generations)! +
+ figure 11: reference over generations +
figure 11: reference over generations
+
This is not good, so at the moment when an old-generational object refers to a new-generational object, @@ -1701,7 +1741,7 @@ but the precise cause has not figured out. -h3. Compaction +### Compaction Could the `ruby`'s GC do compaction? @@ -1725,7 +1765,10 @@ But as trade-offs, accessing speed slows down and the compatibility of extension libraries is lost. -!images/ch_gc_objid.jpg(reference through the object ID)! +
+ figure 12: reference through the object ID +
figure 12: reference through the object ID
+
Then, the next way is to allow moving the struct only when they are pointed @@ -1736,7 +1779,10 @@ In the ordinary programs, there are not so many objects that object structs is quite high. -!images/ch_gc_mostcopy.jpg(Mostly-copying garbage collection)! +
+ figure 13: Mostly-copying garbage collection +
figure 13: Mostly-copying garbage collection
+
Moreover and moreover, by enabling to move the struct, @@ -1746,7 +1792,7 @@ It seems to be worth to challenge. -h3. `volatile` to protect from GC +### `volatile` to protect from GC I wrote that GC takes care of `VALUE` on the stack, @@ -1757,11 +1803,11 @@ For example, there's a possibility of disappearing in the following case: -
+```TODO-lang
 VALUE str;
 str = rb_str_new2("...");
 printf("%s\n", RSTRING(str)->ptr);
-
+``` Because this code does not access the `str` itself, @@ -1771,9 +1817,9 @@ There's no choice in this case -
+```TODO-lang
 volatile VALUE str;
-
+``` we need to write this way. `volatile` is a reserved word of C, @@ -1793,10 +1839,11 @@ but it seems it could not be applied to `ruby` because its algorithm has a hole. -h2. When to invoke +When to invoke +============== -h3. Inside `gc.c` +### Inside `gc.c` When to invoke GC? @@ -1815,7 +1862,7 @@ Doing GC may free memories and it's possible that a space becomes available agai -h3. Inside the interpritor +### Inside the interpritor There's several places except for `gc.c` where calling `rb_gc()` in the interpretor. @@ -1836,7 +1883,8 @@ that `NODE` cannot be garbage collected while compiling. -h2. Object Creation +Object Creation +=============== We've finished about GC and come to be able to deal with the Ruby objects from @@ -1846,18 +1894,18 @@ This is not so related to GC, rather, it is related a little to the discussion about classes in the previous chapter. -h3. Allocation Framework +### Allocation Framework We've created objects many times. For example, in this way: -
+```TODO-lang
 class C
 end
 C.new()
-
+``` At this time, how does `C.new` create a object? @@ -1868,7 +1916,7 @@ First, `C.new` is actually `Class#new`. Its actual body is this:

▼ `rb_class_new_instance()`

-
+```TODO-lang
  725  VALUE
  726  rb_class_new_instance(argc, argv, klass)
  727      int argc;
@@ -1884,7 +1932,7 @@ First, `C.new` is actually `Class#new`. Its actual body is this:
  737  }
 
 (object.c)
-
+``` `rb_obj_alloc()` calls the `allocate` method against the `klass`. @@ -1894,7 +1942,7 @@ It is `Class#allocate` by default and its actual body is `rb_class_allocate_inst

▼ `rb_class_allocate_instance()`

-
+```TODO-lang
  708  static VALUE
  709  rb_class_allocate_instance(klass)
  710      VALUE klass;
@@ -1914,7 +1962,7 @@ It is `Class#allocate` by default and its actual body is `rb_class_allocate_inst
  723  }
 
 (object.c)
-
+``` `rb_newobj()` is a function that returns a `RVALUE` by taking from the `freelist`. @@ -1932,11 +1980,11 @@ This is summarized as follows: -
+```TODO-lang
 SomeClass.new            = Class#new (rb_class_new_instance)
     SomeClass.allocate       = Class#allocate (rb_class_allocate_instance)
     SomeClass#initialize     = Object#initialize (rb_obj_dummy)
-
+``` I could say that the `allocate` class method is to physically initialize, @@ -1948,7 +1996,7 @@ the "allocation framework". -h3. Creating User Defined Objects +### Creating User Defined Objects Next, we'll examine about the instance creations of the classes defined in @@ -1958,7 +2006,7 @@ how to allocate it, `ruby` don't understand how to create its object. Let's look at how to tell it. -h4. `Data_Wrap_Struct()` +#### `Data_Wrap_Struct()` Whichever it is user-defined or not, its creation mechanism itself can follow @@ -1977,10 +2025,10 @@ This is how to use: -
+```TODO-lang
 struct my *ptr = malloc(sizeof(struct my));  /* arbitrarily allocate in the heap */
 VALUE val = Data_Wrap_Struct(data_class, mark_f, free_f, ptr);
-
+``` `data_class` is the class that `val` belongs to, `ptr` is the pointer to be wrapped. @@ -1998,7 +2046,7 @@ Let's also look at the content of `Data_Wrap_Struct()`.

▼ `Data_Wrap_Struct()`

-
+```TODO-lang
  369  #define Data_Wrap_Struct(klass, mark, free, sval) \
  370      rb_data_object_alloc(klass, sval,             \
                                (RUBY_DATA_FUNC)mark,    \
@@ -2007,7 +2055,7 @@ Let's also look at the content of `Data_Wrap_Struct()`.
  365  typedef void (*RUBY_DATA_FUNC) _((void*));
 
 (ruby.h)
-
+``` Most of it is delegated to `rb_object_alloc()`. @@ -2015,7 +2063,7 @@ Most of it is delegated to `rb_object_alloc()`.

▼ `rb_data_object_alloc()`

-
+```TODO-lang
  310  VALUE
  311  rb_data_object_alloc(klass, datap, dmark, dfree)
  312      VALUE klass;
@@ -2033,7 +2081,7 @@ Most of it is delegated to `rb_object_alloc()`.
  324  }
 
 (gc.c)
-
+``` This is not complicated. As the same as the ordinary objects, it prepares a @@ -2049,7 +2097,7 @@ and defining the function on a class by `rb_define_singleton_method()`. -h4. `Data_Get_Struct()` +#### `Data_Get_Struct()` The next thing is `initialize`. Not only for `initialize`, the methods need a @@ -2059,7 +2107,7 @@ to do it, you can use the `Data_Get_Struct()` macro.

▼ `Data_Get_Struct()`

-
+```TODO-lang
  378  #define Data_Get_Struct(obj,type,sval) do {\
  379      Check_Type(obj, T_DATA); \
  380      sval = (type*)DATA_PTR(obj);\
@@ -2068,7 +2116,7 @@ to do it, you can use the `Data_Get_Struct()` macro.
  360  #define DATA_PTR(dta) (RDATA(dta)->data)
 
 (ruby.h)
-
+``` As you see, it just takes the pointer (to `struct my`) from a member of `RData`. @@ -2077,7 +2125,7 @@ This is simple. `Check_Type()` just checks the struct type. -h3. The Issues of the Allocation Framework +### The Issues of the Allocation Framework So, I've explained innocently until now, but actually the current allocation @@ -2120,9 +2168,9 @@ So, -
+```TODO-lang
 rb_define_allocator(rb_cMy, my_allocate);
-
+``` an alternative like this is currently in discussion. diff --git a/index.md b/index.md new file mode 100644 index 0000000..c25ca24 --- /dev/null +++ b/index.md @@ -0,0 +1,86 @@ +--- +layout: default +--- + +Table of contents +================= + +Some chapters are previews. It means they have not been fully reviewed, +some diagrams may be missing and some sentences may be a little +rough. But it also means they are in open review, so do not hesitate +to address issues. + +* [Preface](preface) +* [Introduction](intro) + +### Part 1: Objects + +* [Chapter 1: A Minimal Introduction to Ruby](minimum) +* [Chapter 2: Objects](object) +* [Chapter 3: Names and name tables](name) +* [Chapter 4: Classes and modules](class) +* [Chapter 5: Garbage collection](gc) +* [Chapter 6: Variables and constants](variable) +* [Chapter 7: Security](security) + +### Part 2: Syntax analysis + +* [Chapter 8: Ruby Language Details](spec) +* [Chapter 9: yacc crash course](yacc) +* [Chapter 10: Parser](parser) +* [Chapter 11: Finite-state scanner](contextual) +* [Chapter 12: Syntax tree construction](syntree) + +### Part 3: Evaluation + +* [Chapter 13: Structure of the evaluator](evaluator) +* [Chapter 14: Context](module) +* [Chapter 15: Methods](method) +* [Chapter 16: Blocks](iterator) +* [Chapter 17: Dynamic evaluation](anyeval) + +### Part 4: Around the evaluator + +* [Chapter 18: Loading](load) +* [Chapter 19: Threads](thread) + +* [Final chapter: Ruby's future - translation unstarted](fin) + +About this Guide +---------------- + +This is a new effort to gather efforts to help translate +[Ruby Hacking Guide](http://i.loveruby.net/ja/rhg/book/) into English. + +The official support site of the original book is +http://i.loveruby.net/ja/rhg/ + +You can download the version of the source code explained and +the tool used in the book +from the official support site of the original book. +* [ruby (1.7.3 2002-09-12) in tar.gz format](http://i.loveruby.net/ja/rhg/ar/ruby-rhg.tar.gz) +* [ruby (1.7.3 2002-09-12) in zip format](http://i.loveruby.net/ja/rhg/ar/ruby-rhg.zip) +* [Pragmatic Programmers' nodeDump 0.1.7](http://i.loveruby.net/ja/rhg/ar/nodeDump-0.1.7.tgz) +* [RHG-version nodedump](http://i.loveruby.net/ja/rhg/ar/nodedump-rhg.tar.gz) + +The original translating project is hosted at RubyForge +http://rubyforge.org/projects/rhg + +Many thanks to [RubyForge](http://rubyforge.org) for hosting us and to +Minero AOKI for letting us translate his work. + +You can get further information about this project from +[the archives of rhg-discussion mailing list](http://rubyforge.org/pipermail/rhg-discussion/) + +There is an old SVN repo, that is hosted at +The RubyForge project page is http://rubyforge.org/projects/rhg. +It has been imported here, and I will attempt to give credit and re-write the +SVN/Git history when I can. + +As for now the contributors to that repo were: + +* Vincent ISAMBART +* meinrad recheis +* Laurent Sansonetti +* Clifford Caoile +* Jean-Denis Vauguet diff --git a/index.textile b/index.textile deleted file mode 100644 index c5c3ae0..0000000 --- a/index.textile +++ /dev/null @@ -1,84 +0,0 @@ ---- -layout: default ---- - -h2. Table of contents - -Some chapters are previews. It means they have not been fully reviewed, -some diagrams may be missing and some sentences may be a little -rough. But it also means they are in open review, so do not hesitate -to address issues. - -* "Preface":preface.html -* "Introduction":intro.html - -h3. Part 1: Objects - -* "Chapter 1: A Minimal Introduction to Ruby":minimum.html -* "Chapter 2: Objects":object.html -* "Chapter 3: Names and name tables":name.html -* "Chapter 4: Classes and modules":class.html -* "Chapter 5: Garbage collection":gc.html -* "Chapter 6: Variables and constants":variable.html -* "Chapter 7: Security":security.html - -h3. Part 2: Syntax analysis - -* "Chapter 8: Ruby Language Details":spec.html -* "Chapter 9: yacc crash course":yacc.html -* "Chapter 10: Parser":parser.html -* "Chapter 11: Finite-state scanner":contextual.html -* "Chapter 12: Syntax tree construction":syntree.html - -h3. Part 3: Evaluation - -* "Chapter 13: Structure of the evaluator":evaluator.html -* "Chapter 14: Context":module.html -* "Chapter 15: Methods":method.html -* "Chapter 16: Blocks":iterator.html -* "Chapter 17: Dynamic evaluation":anyeval.html - -h3. Part 4: Around the evaluator - -* "Chapter 18: Loading":load.html -* "Chapter 19: Threads":thread.html - -* "Final chapter: Ruby's future - translation unstarted":fin.html - -h1. About this Guide - -This is a new effort to gather efforts to help translate -"Ruby Hacking Guide":http://i.loveruby.net/ja/rhg/book/ into English. - -The official support site of the original book is -http://i.loveruby.net/ja/rhg/ - -You can download the version of the source code explained and -the tool used in the book -from the official support site of the original book. -* "ruby (1.7.3 2002-09-12) in tar.gz format":http://i.loveruby.net/ja/rhg/ar/ruby-rhg.tar.gz -* "ruby (1.7.3 2002-09-12) in zip format":http://i.loveruby.net/ja/rhg/ar/ruby-rhg.zip -* "Pragmatic Programmers' nodeDump 0.1.7":http://i.loveruby.net/ja/rhg/ar/nodeDump-0.1.7.tgz -* "RHG-version nodedump":http://i.loveruby.net/ja/rhg/ar/nodedump-rhg.tar.gz - -The original translating project is hosted at RubyForge -http://rubyforge.org/projects/rhg - -Many thanks to "RubyForge":http://rubyforge.org for hosting us and to -Minero AOKI for letting us translate his work. - -You can get further information about this project from -"the archives of rhg-discussion mailing list":http://rubyforge.org/pipermail/rhg-discussion/ - -There is an old SVN repo, that is hosted at -The RubyForge project page is http://rubyforge.org/projects/rhg. -It has been imported here, and I will attempt to give credit and re-write the -SVN/Git history when I can. - -As for now the contributors to that repo were: - -* Vincent ISAMBART -* meinrad recheis -* Laurent Sansonetti -* Clifford Caoile -* Jean-Denis Vauguet diff --git a/intro.textile b/intro.md similarity index 92% rename from intro.textile rename to intro.md index 9048bd9..cbac578 100644 --- a/intro.textile +++ b/intro.md @@ -4,19 +4,21 @@ title: Introduction --- -h1. Introduction +Introduction +------------ -h2. Characteristics of Ruby +Characteristics of Ruby +======================= Some of the readers may have already been familiar with Ruby, but (I hope) there are also many readers who have not. First let's go though a -rough summary of the characteristics of Ruby for such people. +rough summary of the characteristics of Ruby for such people. Hereafter capital "Ruby" refers to Ruby as a language specification, and lowercase "@ruby@" refers to @ruby@ command as an implementation. -h4. Development style +#### Development style Ruby is a language that is being developed by the hand of Yukihiro Matsumoto as an individual. Unlike C or Java or Scheme, it does not have any standard. @@ -41,7 +43,7 @@ following things: * You can redistribute a copy of source code with your modification -There is no need for special permission and payment in all these cases. +There is no need for special permission and payment in all these cases. By the way, the purpose of this book is to read the original @ruby@, @@ -50,7 +52,7 @@ specified. However, white spaces, new lines and comments were added or removed without asking. -h4. It's conservative +#### It's conservative Ruby is a very conservative language. It is equipped with only carefully chosen features that have been tested and washed out in a variety of languages. @@ -67,9 +69,9 @@ been taken from them. For example, @printf@, @getpwent@, @sub@, and @tr@. It is also conservative in implementation. Assembler is not its option for seeking speed. Portability is always considered a higher priority when it -conflicts with speed. +conflicts with speed. -h4. It is an object-oriented language +#### It is an object-oriented language Ruby is an object-oriented language. It is absolutely impossible to exclude it from the features of Ruby. @@ -79,12 +81,12 @@ I will not give a page to this book about what an object-oriented language is. To tell about an object-oriented feature about Ruby, the expression of the code that just going to be explained is the exact sample. -h4. It is a script language +#### It is a script language Ruby is a script language. It seems also absolutely impossible to exclude this from the features of Ruby. To gain agreement of everyone, an introduction of Ruby must include "object-oriented" and "script -language". +language". However, what is a "script language" for example? I couldn't figure out the @@ -92,7 +94,7 @@ definition successfully. For example, John K. Ousterhout, the author of Tcl/Tk, gives a definition as "executable language using @#!@ on UNIX". There are other definitions depending on the view points, such as one that can express a useful program with only one line, or that can execute the code by passing a program -file from the command line, etc. +file from the command line, etc. However, I dare to use another definition, because I don't find much interest @@ -100,16 +102,16 @@ in "what" a script language. I have the only one measure to decide to call it a script language, that is, whether no one would complain about calling it a script language. To fulfill this definition, I would define the meaning of "script language" as -follows. +follows. A language that its author calls it a "script language". -I'm sure this definition will have no failure. And Ruby fulfills this point. -Therefore I call Ruby a "script language". +I'm sure this definition will have no failure. And Ruby fulfills this point. +Therefore I call Ruby a "script language". -h4. It's an interpreter +#### It's an interpreter @ruby@ is an interpreter. That's the fact. But why it's an interpreter? For example, couldn't it be made as a compiler? @@ -135,7 +137,7 @@ accurate. A language could possibly be planned so that it won't show the process of compilation. Actually, Delphi can compile a project by hitting just F5. A claim about a long time for compilation is derived from the size of the project or optimization of the codes. Compilation itself doesn't owe a negative -side. +side. Well, why people perceive an interpreter and compiler so much different like @@ -160,14 +162,14 @@ book is facing, so I emphasize it here again. Though I don't know about "it's handy because it is an interpreter", anyway @ruby@ is implemented as an interpreter. -h4. High portability +#### High portability Even with a problem that fundamentally the interfaces are Unix-centered, I would insist @ruby@ possesses a high portability. It doesn't require any extremely unfamiliar library. It has only a few parts written in assembler. Therefore porting to a new platform is comparatively easy. Namely, it works -on the following platforms currently. +on the following platforms currently. * Linux @@ -197,7 +199,7 @@ Thus when using Linux, you will not fail to compile any time. Furthermore, you can expect a stable functionality on a (typical) Unix environment. Considering the release cycle of packages, the primary option for the environment to hit around @ruby@ should fall on a branch of PC UNIX, - currently. + currently. On the other hand, the Win32 environment tends to cause problems definitely. @@ -205,25 +207,25 @@ The large gaps in the targeting OS model tend to cause problems around the machine stack and the linker. Yet, recently Windows hackers have contributed to make better support. I use a native ruby on Windows 2000 and Me. Once it gets successfully run, it doesn't seem to show special concerns like frequent -crashing. The main problems on Windows may be the gaps in the specifications. +crashing. The main problems on Windows may be the gaps in the specifications. Another type of OS that many people may be interested in should probably be Mac -OS (prior to v9) and handheld OS like Palm. +OS (prior to v9) and handheld OS like Palm. Around @ruby 1.2@ and before, it supported legacy Mac OS, but the development seems to be in suspension. Even a compiling can't get through. The biggest cause is that the compiler environment of legacy Mac OS and the decrease of developers. Talking about Mac OS X, there's no worries because the body is -UNIX. +UNIX. There seem to be discussions the portability to Palm several branches, but I have never heard of a successful project. I guess the difficulty lies in the necessity of settling down the specification-level standards such as @stdio@ on the Palm platform, rather than the processes of actual implementation. Well I -saw a porting to Psion has been done. ([ruby-list:36028]). +saw a porting to Psion has been done. ([ruby-list:36028]). How about hot stories about VM seen in Java and .NET? @@ -231,49 +233,49 @@ Because I'd like to talk about them combining together with the implementation, this topic will be in the final chapter. -h4. Automatic memory control +#### Automatic memory control Functionally it's called GC, or Garbage Collection. Saying it in C-language, this feature allows you to skip @free()@ after @malloc()@. Unused memory is detected by the system automatically, and will be released. It's so convenient that once you get used to GC you won't be willing to do such manual -memory control again. +memory control again. The topics about GC have been common because of its popularity in recent languages with GC as a standard set, and it is fun that its algorithms can still be improved further. -h4. Typeless variables +#### Typeless variables The variables in Ruby don't have types. The reason is probably typeless variables conforms more with polymorphism, which is one of the strongest advantages of an object-oriented language. Of course a language with variable type has a way to deal with polymorphism. What I mean here is a typeless -variables have better conformance. +variables have better conformance. The level of "better conformance" in this case refers to synonyms like "handy". It's sometimes corresponds to crucial importance, sometimes it doesn't matter practically. Yet, this is certainly an appealing point if a language seeks for -"handy and easy", and Ruby does. +"handy and easy", and Ruby does. -h4. Most of syntactic elements are expressions +#### Most of syntactic elements are expressions This topic is probably difficult to understand instantly without a little supplemental explanation. For example, the following C-language program results in a syntactic error. -
 
+```TODO-lang
 result = if (cond) { process(val); } else { 0; }
-
+``` Because the C-language syntax defines @if@ as a statement. But you can write it as follows. -
 
+```TODO-lang
 result = cond ? process(val) : 0;
-
+``` This rewrite is possible because the conditional operator (@a?b:c@) is defined as an expression. @@ -281,9 +283,9 @@ as an expression. On the other hand, in Ruby, you can write as follows because @if@ is an expression. -
 
+```TODO-lang
 result = if cond then process(val) else nil end
-
+``` Roughly speaking, if it can be an argument of a function or a method, you can consider it as an expression. @@ -292,12 +294,12 @@ Of course, there are other languages whose syntactic elements are mostly express Lisp is the best example. Because of the characteristic around this, there seems many people who feel like "Ruby is similar to Lisp". -h4. Iterators +#### Iterators Ruby has iterators. What is an iterator? Before getting into iterators, I should mention the necessity of using an alternative term, because the word "iterator" is disliked recently. However, I don't have a good alternative. So let us keep calling -it "iterator" for the time being. +it "iterator" for the time being. Well again, what is an iterator? If you know higher-order function, for the time being, you can regard it as something similar to it. @@ -308,9 +310,9 @@ it's good to imagine something like a custom @for@ statement which we can define Yet, the above are merely examples of "similar" concepts. All of them are similar, but they are not identical to Ruby's iterator. I will expand the precise story -when it's a good time later. +when it's a good time later. -h4. Written in C-language +#### Written in C-language Being written in C-language is not notable these days, but it's still a characteristic for sure. @@ -335,10 +337,10 @@ it would warn prototype mismatch and could not compile. ... These kind of stories are often reported to the mailing list. -h4. Extension library +#### Extension library We can write a Ruby library in C and load it at runtime without recompiling Ruby. -This type of library is called "Ruby extension library" or just "Extension library". +This type of library is called "Ruby extension library" or just "Extension library". Not only the fact that we can write it in C, but the very small difference in the code expression between Ruby-level and @@ -347,7 +349,7 @@ As for the operations available in Ruby, we can also use them in C in the almost same way. See the following example. -
 
+```TODO-lang
 # Method call
 obj.method(arg)                                 # Ruby
 rb_funcall(obj, rb_intern("method"), 1, arg);   # C
@@ -363,17 +365,17 @@ rb_raise(rb_eArgError, "wrong number of arguments");   # C
 # Generating an object
 arr = Array.new             # Ruby
 VALUE arr = rb_ary_new();   # C
-
+``` It's good because it provides easiness in composing an extension library, and actually it makes an indispensable prominence of @ruby@. However, it's also a burden for @ruby@ implementation. You can see the affects of it in many places. The affects to GC and -thread-processing is eminent. +thread-processing is eminent. -h4. Thread +#### Thread Ruby is equipped with thread. Assuming a very few people knowing none about thread these -days, I will omit an explanation about the thread itself. I will start a story in detail. +days, I will omit an explanation about the thread itself. I will start a story in detail. `ruby`'s thread is a user-level thread that is originally written. The characteristic of this implementation is a very high portability in both specification and implementation. @@ -384,9 +386,10 @@ However, as a trade off for such an extremeness of portability, @ruby@ abandons It's, say, probably the slowest of all user-level thread implementations in this world. The tendency of @ruby@ implementation may be seen here the most clearly. -h2. Technique to read source code +Technique to read source code +============================= -Well. After an introduction of @ruby@, we are about to start reading source code. But wait. +Well. After an introduction of @ruby@, we are about to start reading source code. But wait. Any programmer has to read a source code somewhere, but I guess there are not many occasions that someone teaches you the concrete ways how to read. Why? @@ -395,17 +398,17 @@ Does it mean you can naturally read a program if you can write a program? But I can't think reading the program written by other people is so easy. In the same way as writing programs, there must be techniques and theories in reading programs. And they are necessary. Therefore, before starting to ready @ruby@, I'd like to expand a general -summary of an approach you need to take in reading a source code. +summary of an approach you need to take in reading a source code. -h3. Principles +### Principles -At first, I mention the principle. +At first, I mention the principle. -h4. Decide a goal +#### Decide a goal
-An important key to reading the source code is to set a concrete goal. +An important key to reading the source code is to set a concrete goal.
@@ -415,23 +418,23 @@ you would get source code expanded or explanatory books ready on the desk. But n what to do, the studies are to be left untouched. Haven't you? On the other hand, when you have in mind "I'm sure there is a bug somewhere in this tool. I need to quickly fix it and make it work. Otherwise I will not be able to make the deadline...", you will probably be -able to fix the code in a blink, even if it's written by someone else. Haven't you? +able to fix the code in a blink, even if it's written by someone else. Haven't you? The difference in these two cases is motivation you have. In order to know something, you at least have to know what you want to know. Therefore, the first step of all is -to figure out what you want to know in explicit words. +to figure out what you want to know in explicit words. However, of course this is not all needed to make it your own "technique". Because "technique" needs to be a common method that anybody can make use of it by following it. In the following section, I will explain how to bring the first step into the landing place -where you achieve the goal finally. +where you achieve the goal finally. -h4. Visualising the goal +#### Visualising the goal Now let us suppose that our final goal is set "Understand all about @ruby@". This is certainly considered as "one set goal", but apparently it will not be useful for reading the source code actually. It will not be a trigger of any concrete action. Therefore, your first job will be to -drag down the vague goal to the level of a concrete thing. +drag down the vague goal to the level of a concrete thing. Then how can we do it? The first way is thinking as if you are the person who wrote the program. You can utilize your knowledge in writing a program, in this case. For example, @@ -448,36 +451,36 @@ but they are often done relying on experience or intuition. In what way can we read source codes well? Thinking about the way itself and being aware of it are crucially important. -Well, what are such methods like? I will explain it in the next section. +Well, what are such methods like? I will explain it in the next section. -h3. Analysis methods +### Analysis methods The methods to read source code can be roughly divided into two; one is a static method and the other is dynamic method. Static method is to read and analyze the source code without -running the program. Dynamic method is to watch the actual behavior using tools like a debugger. +running the program. Dynamic method is to watch the actual behavior using tools like a debugger. It's better to start studying a program by dynamic analysis. That is because what you can see there is the "fact". The results from static analysis, due to the fact of not running the program actually, may well be "prediction" to a greater or lesser extent. If you want to know the truth, -you should start from watching the fact. +you should start from watching the fact. Of course, you don't know whether the results of dynamic analysis are the fact really. The debugger could run with a bug, or the CPU may not be working properly due to overheat. The conditions of your configuration could be wrong. However, the results of static analysis -should at least be closer to the fact than dynamic analysis. +should at least be closer to the fact than dynamic analysis. -h3. Dynamic analysis +### Dynamic analysis -h4. Using the target program +#### Using the target program You can't start without the target program. First of all, you need to know in advance what -the program is like, and what are expected behaviors. +the program is like, and what are expected behaviors. -h4. Following the behavior using the debugger +#### Following the behavior using the debugger If you want to see the paths of code execution and the data structure produced as a result, it's quicker to look at the result by running the program actually than to emulate the behavior -in your brain. In order to do so easily, use the debugger. +in your brain. In order to do so easily, use the debugger. I would be more happy if the data structure at runtime can be seen as a picture, but unfortunately we can nearly scarcely find a tool for that purpose @@ -485,20 +488,20 @@ but unfortunately we can nearly scarcely find a tool for that purpose If it is about a snapshot of the comparatively simpler structure, we might be able to write it out as a text and convert it to a picture by using a tool like graphviz\footnote{graphviz……See doc/graphviz.html in the attached CD-ROM}. -But it's very difficult to find a way for general purpose and real time analysis. +But it's very difficult to find a way for general purpose and real time analysis. -h4. Tracer +#### Tracer -You can use the tracer if you want to trace the procedures that code goes through. In case of C-language, there is a tool named ctrace\footnote{ctrace……http://www.vicente.org/ctrace}. For tracing a system call, you can use tools like strace\footnote{strace……http://www.wi.leidenuniv.nl/~wichert/strace/}, truss, and ktrace. +You can use the tracer if you want to trace the procedures that code goes through. In case of C-language, there is a tool named ctrace\footnote{ctrace……http://www.vicente.org/ctrace}. For tracing a system call, you can use tools like strace\footnote{strace……http://www.wi.leidenuniv.nl/~wichert/strace/}, truss, and ktrace. -h4. Print everywhere +#### Print everywhere There is a word "printf debugging". This method also works for analysis other than debugging. If you are watching the history of one variable, for example, it may be easier to understand to look at the dump of the result of the print statements embed, than to track the variable with a debugger. -h4. Modifying the code and running it +#### Modifying the code and running it Say for example, in the place where it's not easy to understand its behavior, just make a small change in some part of the code or a particular parameter @@ -509,9 +512,9 @@ meaning of the code from it. It goes without saying, you should also have an original binary and do the same thing on both of them. -h3. Static analysis +### Static analysis -h4. The importance of names +#### The importance of names Static analysis is simply source code analysis. And source code analysis is really an analysis of names. File names, function names, variable names, type names, @@ -527,17 +530,17 @@ And in object-oriented programs, function names sometimes contain the information about where they belong to in prefixes, and it becomes valuable information (e.g. @rb_str_length@). -h4. Reading documents +#### Reading documents Sometimes a document describes the internal structure is included. Especially be careful of a file named @HACKING@ etc. -h4. Reading the directory structure +#### Reading the directory structure Looking at in what policy the directories are divided. Grasping the overview such as how the program is structured, and what the parts are. -h4. Reading the file structure +#### Reading the file structure While browsing (the names of) the functions, also looking at the policy of how the files are divided. @@ -549,7 +552,7 @@ for each module the functions to compose it should be grouped together, so you can find out the module structure from the order of the functions. -h4. Investigating abbreviations +#### Investigating abbreviations As you encounter ambiguous abbreviations, make a list of them and investigate each of them as early as possible. For example, when it is written "GC", @@ -562,7 +565,7 @@ popular abbreviations in the fields of the target program are used unconditionally, thus you should be familiar with them at an early stage. -h4. Understanding data structure +#### Understanding data structure If you find both data and code, you should first investigate the data structure. In other words, when exploring code in C, it's better to start with header files. @@ -574,7 +577,7 @@ For example, if you find the member @next@, which points to its own type, then i will be a linked list. Similarly, when you find members such as @parent@, @children@, and @sibling@, then it must be a tree structure. When @prev@, it will be a stack. -h4. Understanding the calling relationship between functions +#### Understanding the calling relationship between functions After names, the next most important thing to understand is the relationships between functions. A tool to visualize the calling relationships is especially called a @@ -587,7 +590,7 @@ When I analyzed @ruby@ to write this book, I wrote a small command language and a parser in Ruby and generated diagrams half-automatically by passing the results to the tool named @graphviz@. -h4. Reading functions +#### Reading functions Reading how it works to be able to explain things done by the function concisely. It's good to read it part by part as looking at the figure of the function @@ -602,7 +605,7 @@ Additionally, when you don't like its coding style, you can convert it by using the tool like @indent@. -h4. Experimenting by modifying it as you like +#### Experimenting by modifying it as you like It's a mystery of human body, when something is done using a lot of parts of your body, @@ -626,7 +629,7 @@ mistake. And since the purpose of rewriting is getting used to and not rewriting itself, please be careful not to be enthusiastic very much. -h3. Reading the history +### Reading the history A program often comes with a document which is about the history of changes. For example, if it is a software of GNU, there's always a file named @@ -645,7 +648,7 @@ time because often there's the information about the exact reason of a certain change. Of course, if you can search online, it's also sufficient. -h3. The tools for static analysis +### The tools for static analysis Since various tools are available for various purposes, I can't describe them as a whole. @@ -655,7 +658,7 @@ the other purposes. For instance, @gctags@, which comes with it, is actually a tool to create tag files, but you can use it to create a list of the function names contained in a file. -
+```TODO-lang
 ~/src/ruby % gctags class.c | awk '{print $1}'
 SPECIAL_SINGLETON
 SPECIAL_SINGLETON
@@ -667,7 +670,7 @@ ins_methods_prot_i
 method_list
         :
         :
-
+``` That said, but this is just a recommendation of this author, you as a reader can use whichever tool you like. @@ -683,9 +686,10 @@ features. -h2. Build +Build +===== -h3. Target version +### Target version The version of @ruby@ described in this book is 1.7 (2002-09-12). Regarding @ruby@, @@ -709,32 +713,32 @@ out in the near future. And the last one is, investigating the edge would make our mood more pleasant. -h3. Getting the source code +### Getting the source code The archive of the target version is included in the attached CD-ROM. In the top directory of the CD-ROM, -
+```TODO-lang
 ruby-rhg.tar.gz
 ruby-rhg.zip
 ruby-rhg.lzh
-
+``` these three versions are placed, so I'd like you to use whichever one that is convenient for you. Of course, whichever one you choose, the content is the same. For example, the archive of @tar.gz@ can be extracted as follows. -
+```TODO-lang
 ~/src % mount /mnt/cdrom
 ~/src % gzip -dc /mnt/cdrom/ruby-rhg.tar.gz | tar xf -
 ~/src % umount /mnt/cdrom
-
+``` -h3. Compiling +### Compiling Just by looking at the source code, you can "read" it. But in order to know about the program, you need to actually use it, remodel it @@ -751,18 +755,18 @@ However, Cygwin is on Windows but almost Unix, thus I'd like you to read this section for it. -h4. Building on a Unix-like OS +#### Building on a Unix-like OS When it is a Unix-like OS, because generally it is equipped with a C compiler, by following the below procedures, it can pass in most cases. Let us suppose @~/src/ruby@ is the place where the source code is extracted. -
+```TODO-lang
 ~/src/ruby % ./configure
 ~/src/ruby % make
 ~/src/ruby % su
 ~/src/ruby # make install
-
+``` Below, I'll describe several points to be careful about. @@ -773,9 +777,9 @@ or you'd fail to link. @--enable-shared@ is an option to put the most of @ruby@ out of the command as shared libraries (@libruby.so@). -
+```TODO-lang
 ~/src/ruby % ./configure --enable-shared
-
+``` The detailed tutorial about building is included in @doc/build.html@ of the attached CD-ROM, I'd like you to try as reading it. @@ -783,7 +787,7 @@ attached CD-ROM, I'd like you to try as reading it. -h4. Building on Windows +#### Building on Windows If the thing is to build on windows, it becomes way complicated. @@ -836,7 +840,7 @@ thus I'd like you to check it when it is necessary. -h4. Visual C++ +#### Visual C++ It is said Visual C++, but usually IDE is not used, we'll build from DOS prompt. In this case, first we need to initialize environment variables to be able to @@ -844,31 +848,31 @@ run Visual C++ itself. Since a batch file for this purpose came with Visual C++, let's execute it first. -
+```TODO-lang
 C:\> cd "\Program Files\Microsoft Visual Studio .NET\Vc7\bin"
 C:\Program Files\Microsoft Visual Studio .NET\Vc7\bin> vcvars32
-
+``` This is the case of Visual C++ .NET. If it is version 6, it can be found in the following place. -
+```TODO-lang
 C:\Program Files\Microsoft Visual Studio\VC98\bin\
-
+``` After executing @vcvars32@, all you have to do is to move to the @win32\@ folder of the source tree of @ruby@ and build. Below, let us suppose the source tree is in @C:\src@. -
+```TODO-lang
 C:\> cd src\ruby
 C:\src\ruby> cd win32
 C:\src\ruby\win32> configure
 C:\src\ruby\win32> nmake
 C:\src\ruby\win32> nmake DESTDIR="C:\Program Files\ruby" install
-
+``` Then, @ruby@ command would be installed in @C:\Program Files\ruby\bin\@, @@ -877,7 +881,7 @@ Because @ruby@ does not use registries and such at all, you can uninstall it by deleting @C:\Program Files\ruby@ and below. -h4. MinGW +#### MinGW As described before, MinGW is only an environment to compile, thus the general UNIX tools like @sed@ or @sh@ are not available. @@ -896,12 +900,12 @@ Cygwin. Both Cygwin and MinGW are also included in the attached CD-ROM. \footnote{Cygwin and MinGW……See also doc/win.html of the attached CD-ROM} After that, all you have to do is to type as follows from @bash@ prompt of Cygwin. -
+```TODO-lang
 ~/src/ruby % ./configure --with-gcc='gcc -mno-cygwin' \
                                  --enable-shared i386-mingw32
 ~/src/ruby % make
 ~/src/ruby % make install
-
+``` That's it. Here the line of @configure@ spans multi-lines but in practice we'd write it on one line and the backslash is not necessary. @@ -913,7 +917,8 @@ the attached CD-ROM. -h2. Building Details +Building Details +================ Until here, it has been the @README@-like description. This time, let's look at exactly what is done by what we have been done. @@ -928,7 +933,7 @@ As considering the explanation about @make install@ unnecessary, I'll explain the @configure@ phase and the @make@ phase. -h3. @configure@ +### @configure@ First, @configure@. Its content is a shell script, and we detect the system parameters by using it. For example, "whether there's the header file @@ -948,22 +953,22 @@ If we put a @Makefile.in@ in which parameters are embedded in the form of with the actual values. For example, as follows, -
+```TODO-lang
 Makefile.in:  CFLAGS = @CFLAGS@
                      ↓
 Makefile   :  CFLAGS = -g -O2
-
+``` Alternatively, it writes out the information about, for instance, whether -there are certain functions or particular header files, into a header file. +there are certain functions or particular header files, into a header file. Because the output file name can be changed, it is different depending on each program, but it is @config.h@ in @ruby@. I'd like you to confirm this file is created after executing @configure@. Its content is something like this.

config.h

-
+```TODO-lang
          :
          :
 #define HAVE_SYS_STAT_H 1
@@ -981,7 +986,7 @@ Its content is something like this.
 #define SIZEOF_SHORT 2
          :
          :
-
+``` Each meaning is easy to understand. @@ -997,19 +1002,19 @@ differences. Bridging the difference is left to each programmer. For example, as follows, ▼ A typical usage of the `HAVE_` macro -
+```TODO-lang
   24  #ifdef HAVE_STDLIB_H
   25  # include 
   26  #endif
 
 (ruby.h)
-
+``` -h3. @autoconf@ +### @autoconf@ @configure@ is not a `ruby`-specific tool. Whether there are functions, there are header files, ... @@ -1028,7 +1033,10 @@ It's the same as the relationship between @Makefile@ and @Makefile.in@. To illustrate this talk up until here, it would be like Figure 1. -!images/ch_abstract_build.jpg(The process until @Makefile@ is created)! +
+ figure 1: The process until @Makefile@ is created +
figure 1: The process until @Makefile@ is created
+
For the readers who want to know more details, I recommend "GNU Autoconf/Automake/Libtool" Gary V.Vaughan, Ben Elliston, Tom @@ -1041,7 +1049,7 @@ can be used. Anyway, it's sufficient if ultimately there are @Makefile@ and @config.h@ and many others. -h3. @make@ +### @make@ At the second phase, @make@, what is done? Of course, it would compile the source code of @ruby@, @@ -1049,12 +1057,12 @@ but when looking at the output of @make@, I feel like there are many other things it does. I'll briefly explain the process of it. -# compile the source code composing @ruby@ itself -# create the static library @libruby.a@ gathering the crucial parts of @ruby@ -# create "@miniruby@", which is an always statically-linked @ruby@ -# create the shared library @libruby.so@ when @--enable-shared@ -# compile the extension libraries (under @ext/@) by using @miniurby@ -# At last, generate the real @ruby@ +* compile the source code composing @ruby@ itself +* create the static library @libruby.a@ gathering the crucial parts of @ruby@ +* create "@miniruby@", which is an always statically-linked @ruby@ +* create the shared library @libruby.so@ when @--enable-shared@ +* compile the extension libraries (under @ext/@) by using @miniurby@ +* At last, generate the real @ruby@ There are two reasons why it creates @miniruby@ and @ruby@ separately. The first one is that compiling the extension libraries requires @ruby@. @@ -1072,7 +1080,8 @@ In order to resolve this dilemma, it uses @miniruby@. -h2. @CVS@ +@CVS@ +===== The @ruby@ archive included in the attached CD-ROM is, as the same as the official release package, @@ -1083,7 +1092,7 @@ there. Then what is the way to see the entire picture including the past. We can do it by using CVS. -h3. About CVS +### About CVS CVS is shortly an undo list of editors. If the source code is under the management of CVS, @@ -1116,7 +1125,10 @@ There's only one repository, but you can have multiple working copies. (Figure 2) -!images/ch_abstract_repo.jpg(Repository and working copies)! +
+ figure 2: Repository and working copies +
figure 2: Repository and working copies
+
When you'd like to modify the source code, @@ -1128,7 +1140,10 @@ returning is called "checkin" or "commit" (Figure 3). By checking in, the change is recorded to the repository, then we can obtain it any time. -!images/ch_abstract_ci.jpg(Checkin and Checkout)! +
+ figure 3: Checkin and Checkout +
figure 3: Checkin and Checkout
+
The biggest trait of CVS is we can access it over the networks. @@ -1137,7 +1152,7 @@ everyone can checkin/checkout over the internet any time. But generally the access to check in is restricted and we can't do it freely. -h4. Revision +#### Revision How can we do to obtain a certain version from the repository? One way is to specify with time. By requiring "give me the edge version of that @@ -1158,7 +1173,7 @@ Next it would be 1.3 then 1.4. -h4. A simple usage example of CVS +#### A simple usage example of CVS Keeping in mind the above things, I'll talk about the usage of CVS very very briefly. @@ -1171,11 +1186,11 @@ thus it won't be explained here. After installing it, let's checkout the source code of @ruby@ as an experiment. Type the following commands when you are online. -
+```TODO-lang
 % cvs -d :pserver:anonymous@cvs.ruby-lang.org:/src login
 CVS Password: anonymous
 % cvs -d :pserver:anonymous@cvs.ruby-lang.org:/src checkout ruby
-
+``` Any options were not specified, @@ -1187,9 +1202,9 @@ you can use @-D@ option of @cvs checkout@. By typing as follows, you can obtain a working copy of the version which is being explained by this book. -
+```TODO-lang
 % cvs -d :pserver:anonymous@cvs.ruby-lang.org:/src checkout -D2002-09-12 ruby
-
+``` At this moment, you have to write options immediately after @checkout@. If you wrote "@ruby@" first, it would cause a strange error complaining "missing @@ -1206,9 +1221,10 @@ I recommend translated "Open Source Development with CVS" Karl Fogel, Moshe Bar. -h2. The composition of @ruby@ +The composition of @ruby@ +========================= -h3. The physical structure +### The physical structure Now it is time to start to read the source code, but what is the thing we should do first? @@ -1225,7 +1241,7 @@ the CVS repository. What end with a slash are subdirectories. -
+```TODO-lang
 COPYING        compar.c       gc.c           numeric.c      sample/
 COPYING.ja     config.guess   hash.c         object.c       signal.c
 CVS/           config.sub     inits.c        pack.c         sprintf.c
@@ -1244,7 +1260,7 @@ array.c        error.c        missing/       ruby.h         win32/
 bcc32/         eval.c         missing.h      rubyio.h       x68/
 bignum.c       ext/           mkconfig.rb    rubysig.h
 class.c        file.c         node.h         rubytest.rb
-
+``` Recently the size of a program itself has become larger, @@ -1281,14 +1297,14 @@ are also written. -h3. Dissecting Source Code +### Dissecting Source Code From now on, I'll further split the source code of @ruby@ itself into more tiny pieces. As for the main files, its categorization is described in @README.EXT@, thus I'll follow it. Regarding what is not described, I categorized it by myself. -h4. Ruby Language Core +#### Ruby Language Core | @class.c@ | class relating API | | @error.c@ | exception relating API | @@ -1310,12 +1326,12 @@ can be used from extension libraries. | The parts to compose the core of the @ruby@ interpreter. The most of the files which will be explained in this book are contained here. If you consider the number of the files of the entire @ruby@, -it is really only a few. But if you think based on the byte size, +it is really only a few. But if you think based on the byte size, 50% of the entire amount is occupied by these files. Especially, @eval.c@ is 200KB, @parse.y@ is 100KB, these files are large. -h4. Utility +#### Utility | dln.c | dynamic loader | | regex.c | regular expression engine | @@ -1328,7 +1344,7 @@ However, some of them are so large that you cannot imagine it from the word -h4. Implementation of @ruby@ command +#### Implementation of @ruby@ command | @dmyext.c@ | dummy of the routine to initialize extension libraries ( DumMY EXTension ) | | @inits.c@ | the entry point for core and the routine to initialize @@ -1350,7 +1366,7 @@ These commands are functioning by linking to the @libruby@ library -h4. Class Libraries +#### Class Libraries | @array.c@ | @class Array@ | | @bignum.c@ | @class Bignum@ | @@ -1382,7 +1398,7 @@ examples of how to write an extension library. -h4. Files depending on a particular platform +#### Files depending on a particular platform | @bcc32/@ | Borland C++ (Win32) | | @beos/@ | BeOS | @@ -1395,11 +1411,11 @@ h4. Files depending on a particular platform Each platform-specific code is stored. -h4. fallback functions +#### fallback functions -
+```TODO-lang
 missing/
-
+``` Files to offset the functions which are missing on each platform. Mainly functions of @libc@. @@ -1408,7 +1424,7 @@ Mainly functions of @libc@. -h3. Logical Structure +### Logical Structure Now, there are the above four groups and the core can be divided further into three: @@ -1421,7 +1437,7 @@ and evaluator actuates the program. Let me explain them in order. -h4. Object Space +#### Object Space The first one is object space. This is very easy to understand. It is because all of what dealt with by this are basically on the memory, @@ -1430,7 +1446,7 @@ Therefore, in this book, the explanation will start with this part. Part 1 is from chapter 2 to chapter 7. -h4. Parser +#### Parser The second one is parser. Probably some preliminary explanations are necessary for this. @@ -1450,14 +1466,17 @@ The internal expression is called "syntax tree". Syntax tree expresses a program by a tree structure, for instance, figure 4 shows how an @if@ statement is expressed. -!images/ch_abstract_syntree.jpg(an @if@ statement and its corresponding syntax tree)! +
+ figure 4: an @if@ statement and its corresponding syntax tree +
figure 4: an @if@ statement and its corresponding syntax tree
+
Parser will be described in Part 2 "Syntactic Analysis". Part 2 is from chapter 10 to chapter 12. Its target file is only @parse.y@. -h4. Evaluator +#### Evaluator Objects are easy to understand because they are tangible. Also regarding parser, diff --git a/iterator.textile b/iterator.md similarity index 93% rename from iterator.textile rename to iterator.md index 51f8b48..d27a680 100644 --- a/iterator.textile +++ b/iterator.md @@ -3,9 +3,11 @@ layout: default title: "Chapter 16: Blocks" --- -h1. Chapter 16: Blocks +Chapter 16: Blocks +------------------ -h2. Iterator +Iterator +======== In this chapter, `BLOCK`, which is the last big name among the seven Ruby stacks, @@ -14,7 +16,7 @@ After finishing this, the internal state of the evaluator is virtually understoo -h3. The Whole Picture +### The Whole Picture What is the mechanism of iterators? @@ -23,11 +25,11 @@ First, let's think about a small program as below:

▼The Source Program

-
+```TODO-lang
 iter_method() do
   9   # a mark to find this block
 end
-
+``` Let's check the terms just in case. @@ -38,7 +40,7 @@ Here is the syntax tree of this program being dumped.

▼Its Syntax Tree

-
+```TODO-lang
 NODE_ITER
 nd_iter:
     NODE_FCALL
@@ -48,7 +50,7 @@ nd_var = (null)
 nd_body:
     NODE_LIT
     nd_lit = 9:Fixnum
-
+``` @@ -65,16 +67,16 @@ I found that the invocation of an iterator is separated into 3 steps: `NODE_ITER NODE_CALL` and `NODE_YIELD`. This means, -#1 push a block (`NODE_ITER`) -#2 call the method which is an iterator (`NODE_CALL`) -#3 `yield` (`NODE_YEILD`) +* push a block (`NODE_ITER`) +* call the method which is an iterator (`NODE_CALL`) +* `yield` (`NODE_YEILD`) that's all. -h3. Push a block +### Push a block First, let's start with the first step, that is `NODE_ITER`, which is the node @@ -83,7 +85,7 @@ to push a block.

▼ `rb_eval()` − `NODE_ITER` (simplified)

-
+```TODO-lang
 case NODE_ITER:
   {
     iter_retry:
@@ -122,7 +124,7 @@ case NODE_ITER:
       }
   }
   break;
-
+``` Since the original code contains the support of the `for` statement, it is @@ -143,7 +145,10 @@ the existence of a block does not mean the block is pushed for that method. It's possible that the block is pushed for the previous method. (Figure 1) -!images/ch_iterator_stacks.jpg(no one-to-one correspondence between `FRAME` and `BLOCK`)! +
+ figure 1: no one-to-one correspondence between `FRAME` and `BLOCK` +
figure 1: no one-to-one correspondence between `FRAME` and `BLOCK`
+
So, in order to determine for which method the block is pushed, `ITER` is used. @@ -155,7 +160,7 @@ let's check it in practice. -h4. `PUSH_BLOCK()` +#### `PUSH_BLOCK()` The argument of `PUSH_BLOCK()` is (the syntax tree of) the block parameter and @@ -164,7 +169,7 @@ the block body.

▼ `PUSH_BLOCK() POP_BLOCK()`

-
+```TODO-lang
  592  #define PUSH_BLOCK(v,b) do { \
  593      struct BLOCK _block;                  \
  594      _block.tag = new_blktag();            \
@@ -192,7 +197,7 @@ the block body.
  616  } while (0)
 
 (eval.c)
-
+``` Let's make sure that a `BLOCK` is "the snapshot of the environment of the moment @@ -220,12 +225,12 @@ But let's defer the final judge until after looking at and comparing with `PUSH_ -h4. `PUSH_ITER()` +#### `PUSH_ITER()`

▼ `PUSH_ITER() POP_ITER()`

-
+```TODO-lang
  773  #define PUSH_ITER(i) do {               \
  774      struct iter _iter;                  \
  775      _iter.prev = ruby_iter;             \
@@ -237,7 +242,7 @@ h4. `PUSH_ITER()`
  781  } while (0)
 
 (eval.c)
-
+``` On the contrary, this is apparently light. @@ -248,7 +253,7 @@ it would probably matter little. -h3. Iterator Method Call +### Iterator Method Call After pushing a block, the next thing is to call an iterator method (a method @@ -260,7 +265,7 @@ Here.

▼ `rb_call0()` − moving to `ITER_CUR`

-
+```TODO-lang
 4498      switch (ruby_iter->iter) {
 4499        case ITER_PRE:
 4500          itr = ITER_CUR;
@@ -272,7 +277,7 @@ Here.
 4506      }
 
 (eval.c)
-
+``` Since `ITER_PRE` is pushed previously at `NODE_TER`, this code makes @@ -281,7 +286,10 @@ At this moment, a method finally "becomes" an iterator. Figure 2 shows the state of the stacks. -!images/ch_iterator_itertrans.jpg(the state of the Ruby stacks on an iterator call.)! +
+ figure 2: the state of the Ruby stacks on an iterator call. +
figure 2: the state of the Ruby stacks on an iterator call.
+
The possible value of `ruby_iter` is not the one of two boolean values @@ -309,12 +317,12 @@ This is where making use of the third step `ITER`. Let's go back a little and try to see it. -h4. `BEGIN_CALLARGS END_CALLARGS` +#### `BEGIN_CALLARGS END_CALLARGS`

▼ `BEGIN_CALLARGS END_CALLARGS`

-
+```TODO-lang
 1812  #define BEGIN_CALLARGS do {\
 1813      struct BLOCK *tmp_block = ruby_block;\
 1814      if (ruby_iter->iter == ITER_PRE) {\
@@ -328,7 +336,7 @@ h4. `BEGIN_CALLARGS END_CALLARGS`
 1822  } while (0)
 
 (eval.c)
-
+``` When `ruby_iter` is `ITER_PRE`, a `ruby_block` is set aside. @@ -336,18 +344,18 @@ This code is important, for instance, in the below case: -
+```TODO-lang
 obj.m1 { yield }.m2 { nil }
-
+``` The evaluation order of this expression is: -#1 push the block of `m2` -#2 push the block of `m1` -#3 call the method `m1` -#4 call the method `m2` +* push the block of `m2` +* push the block of `m1` +* call the method `m1` +* call the method `m2` Therefore, if there was not `BEGIN_CALLARGS`, @@ -361,7 +369,7 @@ so there's no problem. -h3. Block Invocation +### Block Invocation The third phase of iterator invocation, it means the last phase, @@ -370,7 +378,7 @@ is block invocation.

▼ `rb_eval()` − `NODE_YIELD`

-
+```TODO-lang
 2579        case NODE_YIELD:
 2580          if (node->nd_stts) {
 2581              result = avalue_to_yvalue(rb_eval(self, node->nd_stts));
@@ -383,7 +391,7 @@ is block invocation.
 2588          break;
 
 (eval.c)
-
+``` `nd_stts` is the parameter of `yield`. @@ -419,7 +427,7 @@ it becomes very shorter.

▼ `rb_yield_0()` (simplified)

-
+```TODO-lang
 static VALUE
 rb_yield_0(val, self, klass, /* pcall=0 */)
     VALUE val, self, klass;
@@ -468,7 +476,7 @@ rb_yield_0(val, self, klass, /* pcall=0 */)
 
     return result;
 }
-
+``` As you can see, the most stack frames are replaced with what saved at `ruby_block`. @@ -476,17 +484,17 @@ Things to simple save/restore are easy to understand, so let's see the handling of the other frames we need to be careful about. -h4. `FRAME` +#### `FRAME` -
+```TODO-lang
 struct FRAME frame;
 
 frame = block->frame;     /* copy the entire struct */
 frame.prev = ruby_frame;  /* by these two lines…… */
 ruby_frame = &(frame);    /* ……frame is pushed */
-
+``` Differing from the other frames, a `FRAME` is not used in the saved state, @@ -494,7 +502,10 @@ but a new `FRAME` is created by duplicating. This would look like Figure 3. -!images/ch_iterator_framepush.jpg(push a copied frame)! +
+ figure 3: push a copied frame +
figure 3: push a copied frame
+
As we've seen the code until here, @@ -504,17 +515,17 @@ When pushing `FRAME`, a new `FRAME` will always be created. -h4. `BLOCK` +#### `BLOCK` -
+```TODO-lang
 block = ruby_block;
          :
 ruby_block = block->prev;
          :
 ruby_block = block;
-
+``` @@ -551,7 +562,7 @@ Therefore, it is purposefully checked and set aside. -h4. `VARS` +#### `VARS` Come to think of it, @@ -561,7 +572,7 @@ Let's see them here.

▼ `PUSH_VARS() POP_VARS()`

-
+```TODO-lang
  619  #define PUSH_VARS() do { \
  620      struct RVarmap * volatile _old; \
  621      _old = ruby_dyna_vars;          \
@@ -576,7 +587,7 @@ Let's see them here.
  630  } while (0)
 
 (eval.c)
-
+``` This is also not pushing a new struct, to say "set aside/restore" is closer. @@ -585,9 +596,9 @@ What actually prepares `ruby_dyna_vars` is this line. -
+```TODO-lang
 ruby_dyna_vars = new_dvar(0, 0, block->dyna_vars);
-
+``` This takes the `dyna_vars` saved in `BLOCK` and sets it. @@ -605,7 +616,7 @@ variable at the current block.

▼ `dvar_asgn_curr()`

-
+```TODO-lang
  737  static inline void
  738  dvar_asgn_curr(id, value)
  739      ID id;
@@ -645,7 +656,7 @@ variable at the current block.
  727  }
 
 (eval.c)
-
+``` The last `if` statement is to add a variable. @@ -653,7 +664,10 @@ If we focus on there, we can see a link is always pushed in at the "next" to `ruby_dyna_vars`. This means, it would look like Figure 4. -!images/ch_iterator_dynavarseval.jpg(the structure of `ruby_dyna_vars`)! +
+ figure 4: the structure of `ruby_dyna_vars` +
figure 4: the structure of `ruby_dyna_vars`
+
This differs from the case of the parser in one point: @@ -665,12 +679,15 @@ a single straight link. But according to the errata, it was wrong. That part and relevant descriptions are removed.)) -!images/ch_iterator_insert.jpg(The entry cannot be inserted properly.)! +
+ figure 5: The entry cannot be inserted properly. +
figure 5: The entry cannot be inserted properly.
+
-h3. Target Specified Jump +### Target Specified Jump The code relates to jump tags are omitted in the previously shown code, @@ -680,12 +697,12 @@ I'll tell the reason in advance. I'd like you to see the below program: -
+```TODO-lang
 [0].each do
   break
 end
 # the place to reach by break
-
+``` like this way, in the case when doing `break` from inside of a block, @@ -696,13 +713,13 @@ Let's think by looking at the (dynamic) call graph when invoking an iterator. -
+```TODO-lang
 rb_eval(NODE_ITER)                   .... catch(TAG_BREAK)
     rb_eval(NODE_CALL)               .... catch(TAG_BREAK)
         rb_eval(NODE_YIELD)
             rb_yield_0
                 rb_eval(NODE_BREAK)  .... throw(TAG_BREAK)
-
+``` Since what pushed the block is `NODE_ITER`, @@ -725,7 +742,7 @@ Then, let's see how this is resolved.

▼ `rb_yield_0()` − the parts relates to tags

-
+```TODO-lang
 3826      PUSH_TAG(PROT_NONE);
 3827      if ((state = EXEC_TAG()) == 0) {
               /* ……evaluate the body…… */
@@ -753,7 +770,7 @@ Then, let's see how this is resolved.
 3859      POP_TAG();
 
 (eval.c)
-
+``` The parts of `TAG_BREAK` and `TAG_RETURN` are crucial. @@ -775,7 +792,10 @@ Therefore, if `0x10` did not exist, `state` would be the same value as `TAG_xxxx (See also Figure 6). -!images/ch_iterator_dst.jpg(`block->tag->dst`)! +
+ figure 6: `block->tag->dst` +
figure 6: `block->tag->dst`
+
Now, `tag->dst` became the value which differs from `TAG_xxxx` and is unique for each call. @@ -786,7 +806,7 @@ The place where making an effort is this place of `rb_eval:NODE_ITER`:

▼ `rb_eval()` − `NODE_ITER` (to stop jumps)

-
+```TODO-lang
 case NODE_ITER:
   {
       state = EXEC_TAG();
@@ -800,7 +820,7 @@ case NODE_ITER:
           }
       }
   }
-
+``` In corresponding `NODE_ITER` and `rb_yield_0`, `block` should point to the same thing, @@ -810,7 +830,7 @@ Because of this, only the corresponding `NODE_ITER` can properly stop the jump. -h3. Check of a block +### Check of a block Whether or not a currently being evaluated method is an iterator, @@ -821,7 +841,7 @@ After reading the above all, we can tell its implementation.

▼ `rb_block_given_p()`

-
+```TODO-lang
 3726  int
 3727  rb_block_given_p()
 3728  {
@@ -831,7 +851,7 @@ After reading the above all, we can tell its implementation.
 3732  }
 
 (eval.c)
-
+``` I think there's no problem. What I'd like to talk about this time is actually @@ -840,7 +860,7 @@ another function to check, it is `rb_f_block_given_p()`.

▼ `rb_f_block_given_p()`

-
+```TODO-lang
 3740  static VALUE
 3741  rb_f_block_given_p()
 3742  {
@@ -850,7 +870,7 @@ another function to check, it is `rb_f_block_given_p()`.
 3746  }
 
 (eval.c)
-
+``` This is the substance of Ruby's `block_given?`. @@ -869,7 +889,8 @@ Hence, we need to check the previous one. -h2. `Proc` +`Proc` +====== To describe a `Proc` object from the viewpoint of implementing, @@ -879,7 +900,7 @@ but it also means when and where it will be used becomes completely unpredictabl Focusing on how the influence of this fact is, let's look at the implementation. -h3. `Proc` object creation +### `Proc` object creation A `Proc` object is created with `Proc.new`. @@ -888,7 +909,7 @@ Its substance is `proc_new()`.

▼ `proc_new()`

-
+```TODO-lang
 6418  static VALUE
 6419  proc_new(klass)
 6420      VALUE klass;
@@ -934,7 +955,7 @@ Its substance is `proc_new()`.
 6456  }
 
 (eval.c)
-
+``` The creation of a `Proc` object itself is unexpectedly simple. @@ -988,7 +1009,7 @@ Indeed, it has become a good time. -h3. Floating Frame +### Floating Frame Previously, I mentioned it just in one phrase "duplicate all frames", @@ -1038,7 +1059,7 @@ so let's see it first.

▼ `scope_dup()` only the beginning

-
+```TODO-lang
 6187  static void
 6188  scope_dup(scope)
 6189      struct SCOPE *scope;
@@ -1049,7 +1070,7 @@ so let's see it first.
 6194      scope->flags |= SCOPE_DONT_RECYCLE;
 
 (eval.c)
-
+``` As you can see, `SCOPE_DONT_RECYCLE` is set. @@ -1058,14 +1079,14 @@ Then next, take a look at the definition of `POP_SCOPE()`:

▼ `POP_SCOPE()` only the beginning

-
+```TODO-lang
  869  #define POP_SCOPE()                                      \
  870      if (ruby_scope->flags & SCOPE_DONT_RECYCLE) {        \
  871         if (_old) scope_dup(_old);                        \
  872      }                                                    \
 
 (eval.c)
-
+``` When it pops, if `SCOPE_DONT_RECYCLE` flag was set to the current `SCOPE` (`ruby_scope`), @@ -1075,7 +1096,10 @@ In this way, one by one, the flag is propagated at the time when it pops. (Figure 7) -!images/ch_iterator_dst.jpg(flag propagation)! +
+ figure 7: flag propagation +
figure 7: flag propagation
+
Since `VARS` also does not have any `prev` pointer, @@ -1095,14 +1119,14 @@ I've just got the answer. Take a look at the next program: -
+```TODO-lang
 def get_proc
   Proc.new { nil }
 end
 
 env = get_proc { p 'ok' }
 eval("yield", env)
-
+``` I have not explained this feature, but by passing a `Proc` object as the second @@ -1124,7 +1148,7 @@ the reason in this way.)
)) -h3. Invocation of `Proc` +### Invocation of `Proc` Next, we'll look at the invocation of a created `Proc`. @@ -1137,7 +1161,7 @@ The substance of `Proc#call` is `proc_call()`:

▼ `proc_call()`

-
+```TODO-lang
 6570  static VALUE
 6571  proc_call(proc, args)
 6572      VALUE proc, args;           /* OK */
@@ -1146,7 +1170,7 @@ The substance of `Proc#call` is `proc_call()`:
 6575  }
 
 (eval.c)
-
+``` Delegate to `proc_invoke()`. When I look up `invoke` in a dictionary, @@ -1159,9 +1183,9 @@ The prototype of the `proc_invoke()` is, -
+```TODO-lang
 proc_invoke(VALUE proc, VALUE args, int pcall, VALUE self)
-
+``` However, according to the previous code, `pcall=Qtrue` and `self=Qundef` in this case, @@ -1170,7 +1194,7 @@ so these two can be removed by constant foldings.

▼ `proc_invoke` (simplified)

-
+```TODO-lang
 static VALUE
 proc_invoke(proc, args, /* pcall=Qtrue */, /* self=Qundef */)
     VALUE proc, args;
@@ -1236,7 +1260,7 @@ proc_invoke(proc, args, /* pcall=Qtrue */, /* self=Qundef */)
     }
     return result;
 }
-
+``` @@ -1269,7 +1293,7 @@ you can determine it has finished. -h3. Block and `Proc` +### Block and `Proc` In the previous chapter, various things about arguments and parameters of @@ -1278,10 +1302,10 @@ Although it is brief, here I'll perform the final part of that series. -
+```TODO-lang
 def m(&block)
 end
-
+``` This is a "block parameter". The way to enable this is very simple. @@ -1303,9 +1327,9 @@ Next, it is the side to pass a block. -
+```TODO-lang
 m(&block)
-
+``` This is a "block argument". This is also simple, take a `BLOCK` from (a `Proc` object stored in) `block` and push it. diff --git a/load.textile b/load.md similarity index 95% rename from load.textile rename to load.md index 69caeed..b314f31 100644 --- a/load.textile +++ b/load.md @@ -6,17 +6,18 @@ Translated by Vincent ISAMBART h1(#chapter). Chapter 18: Loading -h2. Outline +Outline +======= -h3. Interface +### Interface At the Ruby level, there are two procedures that can be used for loading: `require` and `load`. -
+```TODO-lang
 require 'uri'            # load the uri library
 load '/home/foo/.myrc'   # read a resource file
-
+``` They are both normal methods, compiled and evaluated exactly like any other code. It means loading occurs after compilation gave control to @@ -26,7 +27,7 @@ These two function each have their own use. 'require' is to load libraries, and `load` is to load an arbitrary file. Let's see this in more details. -h4. `require` +#### `require` `require` has four features: @@ -39,7 +40,7 @@ Ruby's load path is in the global variable `$:`, which contains an array of strings. For example, displaying the content of the `$:` in the environment I usually use would show: -
+```TODO-lang
 % ruby -e 'puts $:'
 /usr/lib/ruby/site_ruby/1.7
 /usr/lib/ruby/site_ruby/1.7/i686-linux
@@ -47,7 +48,7 @@ the environment I usually use would show:
 /usr/lib/ruby/1.7
 /usr/lib/ruby/1.7/i686-linux
 .
-
+``` Calling `puts` on an array displays one element on each line so it's easy to read. @@ -60,9 +61,9 @@ In a Windows environment, there will also be a drive letter. Then, let's try to `require` the standard library `nkf.so` from the load path. -
+```TODO-lang
 require 'nkf'
-
+``` If the `require`d name has no extension, `require` silently compensates. First, it tries with `.rb`, then with `.so`. On some @@ -73,7 +74,7 @@ extension libraries, for example `.dll` in a Windows environment or Let's do a simulation on my environment. `ruby` checks the following paths in sequential order. -
+```TODO-lang
 /usr/lib/ruby/site_ruby/1.7/nkf.rb
 /usr/lib/ruby/site_ruby/1.7/nkf.so
 /usr/lib/ruby/site_ruby/1.7/i686-linux/nkf.rb
@@ -84,7 +85,7 @@ paths in sequential order.
 /usr/lib/ruby/1.7/nkf.so
 /usr/lib/ruby/1.7/i686-linux/nkf.rb
 /usr/lib/ruby/1.7/i686-linux/nkf.so    found!
-
+``` `nkf.so` has been found in `/usr/lib/ruby/1.7/i686-linux`. Once the file has been found, `require`'s last feature (not loading the file @@ -93,13 +94,13 @@ global variable `$"`. In our case the string `"nkf.so"` has been put there. Even if the extension has been omitted when calling `require`, the file name in `$"` has the extension. -
+```TODO-lang
 require 'nkf'   # after loading nkf...
 p $"            # ["nkf.so"]  the file is locked
 
 require 'nkf'   # nothing happens if we require it again
 p $"            # ["nkf.so"]  the content of the lock array does not change
-
+``` There are two reasons for adding the missing extension. The first one is not to load it twice if the same file is later `require`d with its @@ -114,22 +115,22 @@ By the way, `$"` can be freely modified even at the Ruby level so we cannot say it's a strong lock. You can for example load an extension library multiple times if you clear `$"`. -h4. `load` +#### `load` `load` is a lot easier than `require`. Like `require`, it searches the file in `$:`. But it can only load Ruby programs. Furthermore, the extension cannot be omitted: the complete file name must always be given. -
+```TODO-lang
 load 'uri.rb'   # load the URI library that is part of the standard library
-
+``` In this simple example we try to load a library, but the proper way to use `load` is for example to load a resource file giving its full path. -h3. Flow of the whole process +### Flow of the whole process If we roughly split it, "loading a file" can be split in: @@ -145,7 +146,7 @@ programs are basically evaluated at the top-level. It means the defined constants will be top-level constants and the defined methods will be function-style methods. -
+```TODO-lang
 ### mylib.rb
 MY_OBJECT = Object.new
 def my_p(obj)
@@ -155,7 +156,7 @@ end
 ### first.rb
 require 'mylib'
 my_p MY_OBJECT   # we can use the constants and methods defined in an other file
-
+``` Only the local variable scope of the top-level changes when the file changes. In other words, local variables cannot be shared between @@ -169,14 +170,14 @@ the `module` statement, it does not serve any purpose, as everything that is at the top-level of the loaded file is put at the Ruby top-level. -
+```TODO-lang
 require 'mylib'     # whatever the place you require from, be it at the top-level
 module SandBox
   require 'mylib'   # or in a module, the result is the same
 end
-
+``` -h3. Highlights of this chapter +### Highlights of this chapter With the above knowledge in our mind, we are going to read. But because this time its specification is defined very particularly, @@ -200,9 +201,10 @@ currently popular trend of execution time loading, more commonly referred to as plug-ins, works. This is the most interesting part of this chapter, so I'd like to use as many pages as possible to talk about it. -h2. Searching the library +Searching the library +===================== -h3. `rb_f_require()` +### `rb_f_require()` The body of `require` is `rb_f_require`. First, we will only look at the part concerning the file search. Having many different cases is @@ -210,7 +212,7 @@ bothersome so we will limit ourselves to the case when no file extension is given. ▼ `rb_f_require()` (simplified version) -
+```TODO-lang
 5527  VALUE
 5528  rb_f_require(obj, fname)
 5529      VALUE obj, fname;
@@ -262,7 +264,7 @@ extension is given.
 5497  };
 
 (eval.c)
-
+``` In this function the `goto` labels `load_rb` and `load_dyna` are actually like subroutines, and the two variables `feature` and `fname` @@ -286,7 +288,7 @@ that it takes as a second parameter a list of extensions Below we will first look entirely at the file searching code, then we will look at the code of the `require` lock in `load_rb`. -h3. `rb_find_file()` +### `rb_find_file()` First the file search continues in `rb_find_file()`. This function searches the file `path` in the global load path `$'` @@ -294,7 +296,7 @@ searches the file `path` in the global load path `$'` only look at the main part. ▼ `rb_find_file()` (simplified version) -
+```TODO-lang
 2494  VALUE
 2495  rb_find_file(path)
 2496      VALUE path;
@@ -332,11 +334,11 @@ only look at the main part.
 2565  }
 
 (file.c)
-
+``` If we write what happens in Ruby we get the following: -
+```TODO-lang
 tmp = []                     # make an array
 $:.each do |path|            # repeat on each element of the load path
   tmp.push path if path.length > 0 # check the path and push it
@@ -344,7 +346,7 @@ end
 lpath = tmp.join(PATH_SEP)   # concatenate all elements in one string separated by PATH_SEP
 
 dln_find_file(f, lpath)      # main processing
-
+``` `PATH_SEP` is the `path separator`: `':'` under UNIX, `';'` under Windows. `rb_ary_join()` creates a string by putting it between the @@ -364,14 +366,14 @@ objects as parameters or read `ruby` global variables. but in fact this is already done in the omitted part of `rb_find_file()`. So in `ruby`'s case it's not necessary. -h3. Loading wait +### Loading wait Here, file search is finished quickly. Then comes is the loading code. Or more accurately, it is "up to just before the load". The code of `rb_f_require()`'s `load_rb` has been put below. ▼ `rb_f_require():load_rb` -
+```TODO-lang
 5625    load_rb:
 5626      if (rb_feature_p(RSTRING(feature)->ptr, Qtrue))
 5627          return Qfalse;
@@ -390,7 +392,7 @@ of `rb_f_require()`'s `load_rb` has been put below.
 5645      ruby_safe_level = safe;
 
 (eval.c)
-
+``` Like mentioned above, `rb_feature_p()` checks if a lock has been put in `$"`. And `rb_provide_feature()` pushes a string in `$"`, in other @@ -402,13 +404,13 @@ from one thread, and if during the loading another thread tries to load the same file, that thread will wait for the first loading to be finished. If it were not the case: -
+```TODO-lang
 Thread.fork {
     require 'foo'   # At the beginning of require, foo.rb is added to $"
 }                   # However the thread changes during the evaluation of foo.rb
 require 'foo'   # foo.rb is already in $" so the function returns immediately
 # (A) the classes of foo are used...
-
+``` By doing something like this, even though the `foo` library is not really loaded, the code at (A) ends up being executed. @@ -426,7 +428,7 @@ thread. That makes an exclusive lock. And in `rb_feature_p()`, we wait for the loading thread to end like the following. ▼ `rb_feature_p()` (second half) -
+```TODO-lang
 5477  rb_thread_t th;
 5478
 5479  while (st_lookup(loading_tbl, f, &th)) {
@@ -438,7 +440,7 @@ wait for the loading thread to end like the following.
 5485  }
 
 (eval.c)
-
+``` When `rb_thread_schedule()` is called, the control is transferred to an other thread, and this function only returns after the control @@ -451,15 +453,16 @@ can end. The `curr_thread` check is not to lock itself (figure 1). Figure 1: Serialisation of loads -h2. Loading of Ruby programs +Loading of Ruby programs +======================== -h3. `rb_load()` +### `rb_load()` We will now look at the loading process itself. Let's start by the part inside `rb_f_require()`'s `load_rb` loading Ruby programs. ▼ `rb_f_require()-load_rb-` loading -
+```TODO-lang
 5638      PUSH_TAG(PROT_NONE);
 5639      if ((state = EXEC_TAG()) == 0) {
 5640          rb_load(fname, 0);
@@ -467,7 +470,7 @@ part inside `rb_f_require()`'s `load_rb` loading Ruby programs.
 5642      POP_TAG();
 
 (eval.c)
-
+``` The `rb_load()` which is called here is actually the "meat" of the Ruby-level `load`. @@ -479,7 +482,7 @@ And the second argument `wrap` is folded with 0 because it is 0 in the above calling code. ▼ `rb_load()` (simplified edition) -
+```TODO-lang
 void
 rb_load(fname, /* wrap=0 */)
     VALUE fname;
@@ -536,7 +539,7 @@ rb_load(fname, /* wrap=0 */)
     if (!NIL_P(ruby_errinfo))   /* an exception was raised during the loading */
         rb_exc_raise(ruby_errinfo);
 }
-
+``` Just after we thought we've been through the storm of stack manipulations we entered again. Although this is tough, @@ -567,7 +570,7 @@ Or, it's possible it indicates `eval.c`. -h3. `rb_load_file()` +### `rb_load_file()` Then, all of a sudden, the source file is `ruby.c` here. Or to put it more accurately, @@ -580,7 +583,7 @@ all of them would be put in `eval.c` in the first place. Then, it is `rb_load_file()`. ▼ `rb_load_file()` -
+```TODO-lang
  865  void
  866  rb_load_file(fname)
  867      char *fname;
@@ -589,7 +592,7 @@ Then, it is `rb_load_file()`.
  870  }
 
 (ruby.c)
-
+``` Delegated entirely. The second argument `script` of `load_file()` is a boolean value and it indicates whether it is loading the file of the argument of the @@ -600,7 +603,7 @@ non essential things have already been removed.

▼ `load_file()` (simplified edition)

-
+```TODO-lang
 static void
 load_file(fname, /* script=0 */)
     char *fname;
@@ -617,7 +620,7 @@ load_file(fname, /* script=0 */)
     rb_compile_file(fname, f, 1);       (C)
     rb_io_close(f);
 }
-
+``` (A) The call to `fopen()` is to check if the file can be opened. If there is no problem, it's immediately closed. @@ -640,7 +643,7 @@ result. That's all for the loading code. Finally, the calls were quite deep so the callgraph of `rb_f_require()` is shown bellow. -
+```TODO-lang
 rb_f_require           ....eval.c
     rb_find_file            ....file.c
         dln_find_file           ....dln.c
@@ -650,14 +653,14 @@ rb_f_require           ....eval.c
             load_file
                 rb_compile_file     ....parse.y
         eval_node
-
+``` You must bring callgraphs on a long trip. It's common knowledge. -h4. The number of `open` required for loading +#### The number of `open` required for loading Previously, there was `open` used just to check if a file can be open, but in fact, during the loading process of `ruby`, additionally other functions @@ -674,7 +677,7 @@ If you're using Windows, probably your IDE will have a tracer built in. Well, as The output is done on `stderr` so it was redirected using `2>&1`. -
+```TODO-lang
 % strace ruby -e 'require "rational"' 2>&1 | grep '^open'
 open("/etc/ld.so.preload", O_RDONLY)    = -1 ENOENT
 open("/etc/ld.so.cache", O_RDONLY)      = 3
@@ -686,22 +689,23 @@ open("/usr/lib/ruby/1.7/rational.rb", O_RDONLY|O_LARGEFILE) = 3
 open("/usr/lib/ruby/1.7/rational.rb", O_RDONLY|O_LARGEFILE) = 3
 open("/usr/lib/ruby/1.7/rational.rb", O_RDONLY|O_LARGEFILE) = 3
 open("/usr/lib/ruby/1.7/rational.rb", O_RDONLY|O_LARGEFILE) = 3
-
+``` Until the `open` of `libc.so.6`, it is the `open` used in the implementation of dynamic links, and there are the other four `open`s. Thus it seems the three of them are useless. -h2. Loading of extension libraries +Loading of extension libraries +============================== -h3. `rb_f_require()`-`load_dyna` +### `rb_f_require()`-`load_dyna` This time we will see the loading of extension libraries. We will start with `rb_f_require()`'s `load_dyna`. However, we do not need the part about locking anymore so it was removed. ▼ `rb_f_require()`-`load_dyna` -
+```TODO-lang
 5607  {
 5608      int volatile old_vmode = scope_vmode;
 5609
@@ -719,7 +723,7 @@ part about locking anymore so it was removed.
 5621  if (state) JUMP_TAG(state);
 
 (eval.c)
-
+``` By now, there is very little here which is novel. The tags are used only in the way of the idiom, @@ -727,7 +731,7 @@ and to save/restore the visibility scope is done in the way we get used to see. All that remains is `dln_load()`. What on earth is that for? For the answer, continue to the next section. -h3. Brush up about links +### Brush up about links `dln_load()` is loading an extension library, but what does loading an extension library mean? @@ -739,29 +743,29 @@ Since I'm using `gcc` on Linux, I can create a runnable program in the following manner. -
+```TODO-lang
 % gcc hello.c
-
+``` According to the file name, this is probably an "Hello, World!" program. In UNIX, `gcc` outputs a program into a file named `a.out` by default, so you can subsequently execute it in the following way: -
+```TODO-lang
 % ./a.out
 Hello, World!
-
+``` It is created properly. By the way, what is `gcc` actually doing here? Usually we just say "compile" or "compile", but actually -# preprocess (`cpp`) -# compile C into assembly (`cc`) -# assemble the assembly language into machine code (`as`) -# link (`ld`) +* preprocess (`cpp`) +* compile C into assembly (`cc`) +* assemble the assembly language into machine code (`as`) +* link (`ld`) there are these four steps. Among them, preprocessing and compiling and assembling are described in a lot of places, but the description often ends @@ -840,7 +844,7 @@ I recommend to read these books. -h3. Linking that is truly dynamic +### Linking that is truly dynamic And finally we get into our main topic. The "dynamic" in "dynamic linking" naturally means it "occurs at execution time", but what people usually refer to as "dynamic linking" is pretty much decided already at compile time. For example, the names of the needed functions, and which library they can be found in, are already known. For instance, if you need `cos()`, you know it's in `libm`, so you use `gcc -lm`. If you didn't specify the correct library at compile time, you'd get a link error. @@ -856,7 +860,7 @@ is usually called "dynamic load". -h3. Dynamic load API +### Dynamic load API I've finished to explain the concept. The rest is how to do that dynamic loading. This is not a difficult thing. Usually there's a specific API prepared in the @@ -883,22 +887,22 @@ Where dynamic loading APIs are totally different each other, the only saving is the usage pattern of API is completely the same. Whichever platform you are on, -# map the library to the address space of the process -# take the pointers to the functions contained in the library -# unmap the library +* map the library to the address space of the process +* take the pointers to the functions contained in the library +* unmap the library it consists of these three steps. For example, if it is `dlopen`-based API, -# `dlopen` -# `dlsym` -# `dlclose` +* `dlopen` +* `dlsym` +* `dlclose` are the correspondences. If it is Win32 API, -# `LoadLibrary` (or `LoadLibraryEx`) -# `GetProcAddress` -# `FreeLibrary` +* `LoadLibrary` (or `LoadLibraryEx`) +* `GetProcAddress` +* `FreeLibrary` are the correspondences. @@ -915,7 +919,7 @@ and it means loading and linking and finally calling `Init_xxxx()` if it is an extension library. -h3. `dln_load()` +### `dln_load()` Finally, we've reached the content of `dln_load()`. `dln_load()` is also a long function, @@ -923,7 +927,7 @@ but its structure is simple because of some reasons. Take a look at the outline first. ▼ `dln_load()` (outline) -
+```TODO-lang
 void*
 dln_load(file)
     const char *file;
@@ -942,7 +946,7 @@ dln_load(file)
 #endif
     return 0;                   /* dummy return */
 }
-
+``` This way, the part connecting to the main is completely separated based on each platform. When thinking, we only have to think about one platform at a time. @@ -959,12 +963,12 @@ Supported APIs are as follows: * `load` (a bit old AIX) -h3. `dln_load()`-`dlopen()` +### `dln_load()`-`dlopen()` First, let's start with the API code for the `dlopen` series. ▼ `dln_load()`-`dlopen()` -
+```TODO-lang
 1254  void*
 1255  dln_load(file)
 1256      const char *file;
@@ -1012,7 +1016,7 @@ First, let's start with the API code for the `dlopen` series.
 1580  }
 
 (dln.c)
-
+``` (A) the `RTLD_LAZY` as the argument of `dlopen()` indicates "resolving the @@ -1037,14 +1041,14 @@ Thus, we can't call `dlclose()` until the process will be finished. -h3. `dln_load()` -- Win32 +### `dln_load()` -- Win32 As for Win32, `LoadLibrary()` and `GetProcAddress()` are used. It is very general Win32 API which also appears on MSDN. ▼ `dln_load()`-Win32 -
+```TODO-lang
 1254  void*
 1255  dln_load(file)
 1256      const char *file;
@@ -1081,7 +1085,7 @@ It is very general Win32 API which also appears on MSDN.
 1580  }
 
 (dln.c)
-
+``` Doing `LoadLibrary()` then `GetProcAddress()`. The pattern is so equivalent that nothing is left to say, diff --git a/method.textile b/method.md similarity index 96% rename from method.textile rename to method.md index e476d39..45838cd 100644 --- a/method.textile +++ b/method.md @@ -3,15 +3,17 @@ layout: default title: "Chapter 15: Methods" --- -h1. Chapter 15: Methods +Chapter 15: Methods +------------------- In this chapter, I'll talk about method searching and invoking. -h2. Searching methods +Searching methods +================= -h3. Terminology +### Terminology In this chapter, both method calls and method definitions are discussed, @@ -20,7 +22,7 @@ confusing, let's strictly define terms here: -
+```TODO-lang
 m(a)          # a is a "normal argument"
 m(*list)      # list is an "array argument"
 m(&block)     # block is a "block argument"
@@ -29,7 +31,7 @@ def m(a)      # a is a "normal parameter"
 def m(a=nil)  # a is an "option parameter", nil is "it default value".
 def m(*rest)  # rest is a "rest parameter"
 def m(&block) # block is a "block parameter"
-
+``` In short, they are all "arguments" when passing and "parameters" when receiving, @@ -42,19 +44,19 @@ parameters" will be discussed in the next chapter. -h3. Investigation +### Investigation

▼The Source Program

-
+```TODO-lang
 obj.method(7,8)
-
+```

▼Its Syntax Tree

-
+```TODO-lang
 NODE_CALL
 nd_mid = 9049 (method)
 nd_recv:
@@ -69,7 +71,7 @@ nd_args:
         NODE_LIT
         nd_lit = 8:Fixnum
     ]
-
+``` The node for a method call is `NODE_CALL`. @@ -90,7 +92,7 @@ Now, let's look at the handler of `NODE_CALL` in `rb_eval()`.

▼ `rb_eval()` − `NODE_CALL`

-
+```TODO-lang
 2745  case NODE_CALL:
 2746    {
 2747        VALUE recv;
@@ -108,7 +110,7 @@ Now, let's look at the handler of `NODE_CALL` in `rb_eval()`.
 2759    break;
 
 (eval.c)
-
+``` The problems are probably the three macros, `BEGIN_CALLARGS SETUP_ARGS() END_CALLARGS`. @@ -122,7 +124,7 @@ Here, let's investigate only about `SETUP_ARGS()`. -h3. `SETUP_ARGS()` +### `SETUP_ARGS()` `SETUP_ARGS()` is the macro to evaluate the arguments of a method. @@ -134,12 +136,12 @@ Therefore, something like the following is a boilerplate: -
+```TODO-lang
 int argc; VALUE *argv;   /* used in SETUP_ARGS */
 TMP_PROTECT;
 
 SETUP_ARGS(args_node);
-
+``` `args_node` is (the node represents) the arguments of the method, @@ -150,7 +152,7 @@ Let's look at it:

▼ `SETUP_ARGS()`

-
+```TODO-lang
 1780  #define SETUP_ARGS(anode) do {\
 1781      NODE *n = anode;\
 1782      if (!n) {\                             no arguments
@@ -184,7 +186,7 @@ Let's look at it:
 1810  } while (0)
 
 (eval.c)
-
+``` This is a bit long, but since it clearly branches in three ways, not so terrible @@ -205,7 +207,7 @@ If I write in the code (and tidy up a little), it becomes as follows. -
+```TODO-lang
 /***** else if clause、argc!=0 *****/
 int i;
 n = anode;
@@ -222,7 +224,7 @@ if (TYPE(args) != T_ARRAY)
 argc = RARRAY(args)->len;
 argv = ALLOCA_N(VALUE, argc);                   /* 1 */
 MEMCPY(argv, RARRAY(args)->ptr, VALUE, argc);   /* 3 */
-
+``` `TMP_ALLOC()` is used in the `else if` side, @@ -236,7 +238,10 @@ The point is that "in the `else` side the values of arguments are also stored in `args`". If I illustrate, it would look like Figure 1. -!images/ch_method_anchor.jpg(Being in the heap is all right.)! +
+ figure 1: Being in the heap is all right. +
figure 1: Being in the heap is all right.
+
If at least one `VALUE` is on the stack, others can be successively marked through @@ -250,7 +255,7 @@ For your information, "anchor `VALUE`" is the word just coined now. -h3. `rb_call()` +### `rb_call()` `SETUP_ARGS()` is relatively off the track. Let's go back to the main line. The @@ -261,7 +266,7 @@ of them.

▼ `rb_call()` (simplified)

-
+```TODO-lang
 static VALUE
 rb_call(klass, recv, mid, argc, argv, scope)
     VALUE klass, recv;
@@ -294,7 +299,7 @@ rb_call(klass, recv, mid, argc, argv, scope)
     return rb_call0(klass, recv, mid, id,
                     argc, argv, body, noex & NOEX_UNDEF);
 }
-
+``` The basic way of searching methods was discussed in chapter 2: "Object". @@ -314,9 +319,9 @@ What is looking up the cache is the first half of `rb_call()`. Only with -
+```TODO-lang
 ent = cache + EXPR1(klass, mid);
-
+``` this line, the cache is searched. @@ -328,12 +333,15 @@ tree step-by-step and caches the result at the same time. Figure 2 shows the entire flow of searching. -!images/ch_method_msearch.jpg(Method Search)! +
+ figure 2: Method Search +
figure 2: Method Search
+
-h3. Method Cache +### Method Cache Next, let's examine the structure of the method cache in detail. @@ -341,7 +349,7 @@ Next, let's examine the structure of the method cache in detail.

▼Method Cache

-
+```TODO-lang
  180  #define CACHE_SIZE 0x800
  181  #define CACHE_MASK 0x7ff
  182  #define EXPR1(c,m) ((((c)>>3)^(m))&CACHE_MASK)
@@ -358,7 +366,7 @@ Next, let's examine the structure of the method cache in detail.
  193  static struct cache_entry cache[CACHE_SIZE];
 
 (eval.c)
-
+``` If I describe the mechanism shortly, it is a hash table. I mentioned that the @@ -375,7 +383,10 @@ It is `EXPR1()`. Among its arguments, `c` is the class object and `m` is the method name (`ID`). (Figure 3) -!images/ch_method_mhash.jpg(Method Cache)! +
+ figure 3: Method Cache +
figure 3: Method Cache
+
However, `EXPR1()` is not a perfect hash function or anything, so a different @@ -385,7 +396,7 @@ It just slows its performance down a little. -h4. The effect of Method Cache +#### The effect of Method Cache By the way, how much effective is the method cache in actuality? @@ -406,9 +417,10 @@ This is awesome. Apparently, the effect of "it is know as ..." is outstanding. -h2. Invocation +Invocation +========== -h3. `rb_call0()` +### `rb_call0()` There have been many things and finally we arrived at the method invoking. @@ -419,7 +431,7 @@ look at it by dividing into small portions. Starting with the outline:

▼ `rb_call0()` (Outline)

-
+```TODO-lang
 4482  static VALUE
 4483  rb_call0(klass, recv, id, oid, argc, argv, body, nosuper)
 4484      VALUE klass, recv;
@@ -473,7 +485,7 @@ look at it by dividing into small portions. Starting with the outline:
 4706  }
 
 (eval.c)
-
+``` First, an `ITER` is pushed and whether or not the method is an iterator is @@ -504,12 +516,12 @@ could be ignored. The important things are only `NODE_CFUNC`, `NODE_SCOPE` and -h3. `PUSH_FRAME()` +### `PUSH_FRAME()`

▼ `PUSH_FRAME() POP_FRAME()`

-
+```TODO-lang
  536  #define PUSH_FRAME() do {               \
  537      struct FRAME _frame;                \
  538      _frame.prev = ruby_frame;           \
@@ -528,7 +540,7 @@ h3. `PUSH_FRAME()`
  551  } while (0)
 
 (eval.c)
-
+``` First, we'd like to make sure the entire `FRAME` is allocated on the stack. @@ -542,7 +554,7 @@ method of the `FRAME`. `FRAME_ALLOCA` obviously indicates "it is on the stack". -h3. `rb_call0()` - `NODE_CFUNC` +### `rb_call0()` - `NODE_CFUNC` A lot of things are written in this part of the original code, @@ -552,11 +564,11 @@ following line:

▼ `rb_call0()` − `NODE_CFUNC` (simplified)

-
+```TODO-lang
 case NODE_CFUNC:
   result = call_cfunc(body->nd_cfnc, recv, len, argc, argv);
   break;
-
+``` Then, as for `call_cfunc()` ... @@ -564,7 +576,7 @@ Then, as for `call_cfunc()` ...

▼ `call_cfunc()` (simplified)

-
+```TODO-lang
 4394  static VALUE
 4395  call_cfunc(func, recv, len, argc, argv)
 4396      VALUE (*func)();
@@ -603,7 +615,7 @@ Then, as for `call_cfunc()` ...
 4480  }
 
 (eval.c)
-
+``` As shown above, it branches based on the argument count. @@ -618,7 +630,7 @@ And in some places, say, `rb_svar` (`eval.c`), it is actually done. -h3. `rb_call0()` - `NODE_SCOPE` +### `rb_call0()` - `NODE_SCOPE` `NODE_SCOPE` is to invoke a method defined in Ruby. @@ -627,7 +639,7 @@ This part forms the foundation of Ruby.

▼ `rb_call0()` − `NODE_SCOPE` (outline)

-
+```TODO-lang
 4568  case NODE_SCOPE:
 4569    {
 4570        int state;
@@ -700,7 +712,7 @@ This part forms the foundation of Ruby.
 4697    break;
 
 (eval.c)
-
+``` (A) `CREF` forwarding, which was described at the section of constants in the @@ -727,7 +739,7 @@ the method is completely invoked. -h3. Set Parameters +### Set Parameters Then, we'll examine the totally skipped part, which sets parameters. @@ -735,7 +747,7 @@ But before that, I'd like you to first check the syntax tree of the method again -
+```TODO-lang
 % ruby -rnodedump -e 'def m(a) nil end'
 NODE_SCOPE
 nd_rval = (null)
@@ -756,7 +768,7 @@ nd_next:
             nd_next:
                 NODE_NIL
         nd_next = (null)
-
+``` `NODE_ARGS` is the node to specify the parameters of a method. @@ -781,21 +793,21 @@ For example, if you write a definition as below, -
+```TODO-lang
 def m(a, b, c = nil, *rest)
   lvar1 = nil
 end
-
+``` local variable IDs are assigned as follows. -
+```TODO-lang
 0   1   2   3   4   5      6
 $_  $~  a   b   c   rest   lvar1
-
+``` Are you still with me? @@ -804,7 +816,7 @@ Taking this into considerations, let's look at the code.

▼ `rb_call0()` − `NODE_SCOPE` −assignments of arguments

-
+```TODO-lang
 4601  if (nd_type(body) == NODE_ARGS) { /* no body */
 4602      node = body;           /* NODE_ARGS */
 4603      body = 0;              /* the method body */
@@ -874,7 +886,7 @@ Taking this into considerations, let's look at the code.
 4664  }
 
 (eval.c)
-
+``` Since comments are added more than before, @@ -892,9 +904,9 @@ It means the following form: -
+```TODO-lang
 super
-
+``` This `super` has a behavior to directly pass the parameters of the currently executing method. @@ -907,7 +919,7 @@ If there's not, the one after option parameters are assigned seems better. -
+```TODO-lang
 def m(a, b, *rest)
   super     # probably 5, 6, 7, 8 should be passed
 end
@@ -917,7 +929,7 @@ def m(a, b = 6)
   super     # probably 5, 6 should be passed
 end
 m(5)
-
+``` @@ -934,7 +946,7 @@ The rest is, as the ending of this chapter, looking at the implementation of -h3. `super` +### `super` What corresponds to `super` are `NODE_SUPER` and `NODE_ZSUPER`. @@ -944,7 +956,7 @@ and `NODE_ZSUPER` is `super` without arguments.

▼ `rb_eval()` − `NODE_SUPER`

-
+```TODO-lang
 2780        case NODE_SUPER:
 2781        case NODE_ZSUPER:
 2782          {
@@ -985,7 +997,7 @@ and `NODE_ZSUPER` is `super` without arguments.
 2813          break;
 
 (eval.c)
-
+``` For `super` without arguments, I said that `ruby_frame->argv` is directly used @@ -1032,11 +1044,11 @@ What happens if `String.new` is replaced by new definition and `super` is called -
+```TODO-lang
 def String.new
   super
 end
-
+``` As a consequence, an object whose struct is of `T_OBJECT` but whose class is `String` is created. However, a method of `String` is written with expectation of a struct of `T_STRING`, diff --git a/minimum.textile b/minimum.md similarity index 88% rename from minimum.textile rename to minimum.md index d641661..456a818 100644 --- a/minimum.textile +++ b/minimum.md @@ -6,7 +6,8 @@ Translated by Sebastian Krause h1(#chapter). Chapter 1: Introduction -h2. A Minimal Introduction to Ruby +A Minimal Introduction to Ruby +============================== Here the Ruby prerequisites are explained, which one needs to know in order to understand the first section. @@ -21,36 +22,37 @@ and such I'll show only the most widely used notations. On principle I won't omit things even if I can. This way the syntax becomes more simple. I won't always say "We can omit this". -h2. Objects +Objects +======= -h3. Strings +### Strings Everything that can be manipulated in a Ruby program is an object. There are no primitives as Java's `int` and `long`. For instance if we write as below it denotes a string object with content `content`. -
+```TODO-lang
 "content"
-
+``` I casually called it a string object but to be precise this is an expression which generates a string object. Therefore if we write it several times each time another string object is generated. -
+```TODO-lang
 "content"
 "content"
 "content"
-
+``` Here three string objects with content `content` are generated. By the way, objects just existing there can't be seen by programmers. Let's show how to print them on the terminal. -
+```TODO-lang
 p("content")   # Shows "content"
-
+``` Everything after an `#` is a comment. From now on, I'll put the result of an expression in a comment behind. @@ -62,13 +64,13 @@ Precisely speaking, there are no functions in Ruby, but just for now we can think of it as a function. You can use functions wherever you are. -h3. Various Literals +### Various Literals Now, let's explain some more the expressions which directly generate objects, the so-called literals. First the integers and floating point numbers. -
+```TODO-lang
 # Integer
 1
 2
@@ -79,97 +81,98 @@ First the integers and floating point numbers.
 1.0
 99.999
 1.3e4     # 1.3×10^4
-
+``` Don't forget that these are all expressions which generate objects. I'm repeating myself but there are no primitives in Ruby. Below an array object is generated. -
+```TODO-lang
 [1, 2, 3]
-
+``` This program generates an array which consists of the three integers 1, 2 and 3 in that order. As the elements of an array can be arbitrary objects the following is also possible. -
+```TODO-lang
 [1, "string", 2, ["nested", "array"]]
-
+``` And finally, a hash table is generated by the expression below. -
+```TODO-lang
 {"key"=>"value", "key2"=>"value2", "key3"=>"value3"}
-
+``` A hash table is a structure which expresses one-to-one relationships between arbitrary objects. The above line creates a table which stores the following relationships. -
+```TODO-lang
 "key"   →  "value"
 "key2"  →  "value2"
 "key3"  →  "value3"
-
+``` If we ask a hash table created in this way "What's corresponding to `key`?", it'll answer "That's `value`." How can we ask? We use methods. -h3. Method Calls +### Method Calls We can call methods on an object. In C++ Jargon they are member functions. I don't think it's necessary to explain what a method is. I'll just explain the notation. -
+```TODO-lang
 "content".upcase()
-
+``` Here the `upcase` method is called on a string object ( with content `content`). As `upcase` is a method which returns a new string with the small letters replaced by capital letters, we get the following result. -
+```TODO-lang
 p("content".upcase())   # Shows "CONTENT"
-
+``` Method calls can be chained. -
+```TODO-lang
 "content".upcase().downcase()
-
+``` Here the method `downcase` is called on the return value of `"content".upcase()`. There are no public fields (member variables) as in Java or C++. The object interface consists of methods only. -h2. The Program +The Program +=========== -h3. Top Level +### Top Level In Ruby we can just write expressions and it becomes a program. One doesn't need to define a `main()` as in C++ or Java. -
+```TODO-lang
 p("content")
-
+``` This is a complete Ruby program. If we put this into a file called `first.rb` we can execute it from the command line as follows. -
+```TODO-lang
 % ruby first.rb
 "content"
-
+``` With the `-e` option of the `ruby` program we don't even need to create a file. -
+```TODO-lang
 % ruby -e 'p("content")'
 "content"
-
+``` By the way, the place where `p` is written is the lowest nesting level of the program, it means the highest level from the program's standpoint, @@ -179,22 +182,22 @@ Having top-level is a characteristic trait of Ruby as a scripting language. In Ruby, one line is usually one statement. A semicolon at the end isn't necessary. Therefore the program below is interpreted as three statements. -
+```TODO-lang
 p("content")
 p("content".upcase())
 p("CONTENT".downcase())
-
+``` When we execute it it looks like this. -
+```TODO-lang
 % ruby second.rb
 "content"
 "CONTENT"
 "content"
-
+``` -h3. Local Variables +### Local Variables In Ruby all variables and constants store references to objects. That's why one can't copy the content by assigning one variable to another variable. @@ -206,21 +209,21 @@ by the beginning of the name. Local variables start with a small letter or an underscore. One can write assignments by using "`=`". -
+```TODO-lang
 str = "content"
 arr = [1,2,3]
-
+``` An initial assignment serves as declaration, an explicit declaration is not necessary. Because variables don't have types, we can assign any kind of objects indiscriminately. The program below is completely legal. -
+```TODO-lang
 lvar = "content"
 lvar = [1,2,3]
 lvar = 1
-
+``` But even if we can, we don't have to do it. If different kind of objects are put in one variable, it tends to become difficult to read. In a @@ -229,42 +232,45 @@ The above was just an example for the sake of it. Variable reference has also a pretty sensible notation. -
+```TODO-lang
 str = "content"
 p(str)           # Shows "content"
-
+``` In addition let's check the point that a variable hold a reference by taking an example. -
+```TODO-lang
 a = "content"
 b = a
 c = b
-
+``` After we execute this program all three local variables `a b c` point to the same object, a string object with content `"content"` created on the first line (Figure 1). -!images/ch_minimum_reference.png(Ruby variables store references to objects)! +
+ figure 1: Ruby variables store references to objects +
figure 1: Ruby variables store references to objects
+
By the way, as these variables are called local, they should be local to somewhere, but we cannot talk about this scope without reading a bit further. Let's say for now that the top level is one local scope. -h3. Constants +### Constants Constants start with a capital letter. They can only be assigned once (at their creation). -
+```TODO-lang
 Const = "content"
 PI = 3.1415926535
 
 p(Const)   # Shows "content"
-
+``` I'd like to say that if we assign twice an error occurs. But there is just a warning, not an error. @@ -276,10 +282,10 @@ Therefore, it is allowed due to practical requirements and there's no other choi but essentially there should be an error. In fact, up until version 1.1 there really was an error. -
+```TODO-lang
 C = 1
 C = 2   # There is a warning but ideally there should be an error.
-
+``` A lot of people are fooled by the word constant. A constant only does not switch objects once it is assigned. @@ -290,18 +296,21 @@ might capture the concept better than "constant". By the way, to indicate that an object itself shouldn't be changed another means is used: `freeze`. -!images/ch_minimum_const.jpg(constant means read only)! +
+ figure 2: constant means read only +
figure 2: constant means read only
+
And the scope of constants is actually also cannot be described yet. It will be discussed later in the next section mixing with classes. -h3. Control Structures +### Control Structures Since Ruby has a wide abundance of control structures, just lining up them can be a huge task. For now, I just mention that there are `if` and `while`. -
+```TODO-lang
 if i < 10 then
   # body
 end
@@ -309,7 +318,7 @@ end
 while i < 10 do
   # body
 end
-
+``` In a conditional expression, only the two objects, `false` and `nil`, are false and all @@ -318,9 +327,10 @@ other various objects are true. 0 or the empty string are also true of course. It wouldn't be wise if there were just `false`, there is also `true`. And it is of course true. -h2. Classes and Methods +Classes and Methods +=================== -h3. Classes +### Classes In object oriented system, essentially methods belong to objects. It can hold only in a ideal world, though. @@ -341,7 +351,7 @@ And on this `String` class the methods `upcase`, `downcase`, `strip` and many others are defined. So it looks as if each string object can respond to all these methods. -
+```TODO-lang
 # They all belong to the String class,
 # hence the same methods are defined
        "content".upcase()
@@ -351,17 +361,17 @@ methods.
        "content".length()
 "This is a pen.".length()
     "chapter II".length()
-
+``` By the way, what happens if the called method isn't defined? In a static language a compiler error occurs but in Ruby there is a runtime exception. Let's try it out. For this kind of programs the `-e` option is handy. -
+```TODO-lang
 % ruby -e '"str".bad_method()'
 -e:1: undefined method `bad_method' for "str":String (NoMethodError)
-
+``` When the method isn't found there's apparently a `NoMethodError`. @@ -373,25 +383,25 @@ By the way, if we write `String.upcase` it has a completely different meaning in the Ruby world. What could that be? I explain it in the next paragraph. -h3. Class Definition +### Class Definition Up to now we talked about already defined classes. We can of course also define our own classes. To define classes we use the `class` statement. -
+```TODO-lang
 class C
 end
-
+``` This is the definition of a new class `C`. After we defined it we can use it as follows. -
+```TODO-lang
 class C
 end
 c = C.new()   # create an instance of C and assign it to the variable c
-
+``` Note that the notation for creating a new instance is not `new C`. The astute reader might think: @@ -413,26 +423,26 @@ on this object ( usually new). If we look at the example below, it's pretty obvious that the creation of an instance doesn't differ from a normal method call. -
+```TODO-lang
 S = "content"
 class C
 end
 
 S.upcase()  # Get the object the constant S points to and call upcase
 C.new()     # Get the object the constant C points to and call new
-
+``` So `new` is not a reserved word in Ruby. And we can also use `p` for an instance of a class even immediately after its creation. -
+```TODO-lang
 class C
 end
 
 c = C.new()
 p(c)       # #
-
+``` It won't display as nicely as a string or an integer but it shows its respective class and it's internal ID. This ID is the pointer value @@ -443,28 +453,28 @@ Oh, I completely forgot to mention about the notation of method names: So `Object#new` and `Object.new` are completely different things, we have to separate them strictly. -
+```TODO-lang
 obj = Object.new()   # Object.new
 obj.new()            # Object#new
-
+``` In practice a method `Object#new` is almost never defined so the second line will return an error. Please regard this as an example of the notation. -h3. Method Definition +### Method Definition Even if we can define classes, it is useless if we cannot define methods. Let's define a method for our class `C`. -
+```TODO-lang
 class C
   def myupcase( str )
     return str.upcase()
   end
 end
-
+``` To define a method we use the `def` statement. In this example we defined the method `myupcase`. The name of the only parameter is `str`. @@ -474,27 +484,27 @@ And we can use any number of parameters. Let's use the defined method. Methods are usually called from the outside by default. -
+```TODO-lang
 c = C.new()
 result = c.myupcase("content")
 p(result)   # Shows "CONTENT"
-
+``` Of course if you get used to it you don't need to assign every time. The line below gives the same result. -
+```TODO-lang
 p(C.new().myupcase("content"))   # Also shows "CONTENT"
-
+``` -h3. `self` +### `self` During the execution of a method the information about who is itself (the instance on which the method was called) is always saved and can be picked up in `self`. Like the `this` in C++ or Java. Let's check this out. -
+```TODO-lang
 class C
   def get_self()
     return self
@@ -504,7 +514,7 @@ end
 c = C.new()
 p(c)              # #
 p(c.get_self())   # #
-
+``` As we see, the above two expressions return the exact same object. We could confirm that `self` is `c` during the method call on `c`. @@ -512,7 +522,7 @@ We could confirm that `self` is `c` during the method call on `c`. Then what is the way to call a method on itself? What first comes to mind is calling via `self`. -
+```TODO-lang
 class C
   def my_p( obj )
     self.real_my_p(obj)   # called a method against oneself
@@ -524,13 +534,13 @@ class C
 end
 
 C.new().my_p(1)   # Output 1
-
+``` But always adding the `self` when calling an own method is tedious. Hence, it is designed so that one can omit the called method (the receiver) whenever one calls a method on `self`. -
+```TODO-lang
 class C
   def my_p( obj )
     real_my_p(obj)   # You can call without specifying the receiver
@@ -542,9 +552,9 @@ class C
 end
 
 C.new().my_p(1)   # Output 1
-
+``` -h3. Instance Variables +### Instance Variables As there are a saying "Objects are data and code", just being able to define methods alone would be not so useful. @@ -556,7 +566,7 @@ In the fashion of Ruby's variable naming convention, the variable type can be determined by the first a few characters. For instance variables it's an `@`. -
+```TODO-lang
 class C
   def set_i(value)
     @i = value
@@ -570,16 +580,16 @@ end
 c = C.new()
 c.set_i("ok")
 p(c.get_i())   # Shows "ok"
-
+``` Instance variables differ a bit from the variables seen before: We can reference them without assigning (defining) them. To see what happens we add the following lines to the code above. -
+```TODO-lang
 c = C.new()
 p(c.get_i())   # Shows nil
-
+``` Calling `get` without `set` gives `nil`. `nil` is the object which indicates "nothing". @@ -588,11 +598,11 @@ but that's just the way it is. We can use `nil` like a literal as well. -
+```TODO-lang
 p(nil)   # Shows nil
-
+``` -h3. `initialize` +### `initialize` As we saw before, when we call 'new' on a freshly defined class, we can create an instance. That's sure, but @@ -601,7 +611,7 @@ In this case we don't change the `new` method, we define the `initialize` method. When we do this, it gets called within `new`. -
+```TODO-lang
 class C
   def initialize()
     @i = "ok"
@@ -612,18 +622,21 @@ class C
 end
 c = C.new()
 p(c.get_i())   # Shows "ok"
-
+``` Strictly speaking this is the specification of the `new` method but not the specification of the language itself. -h3. Inheritance +### Inheritance Classes can inherit from other classes. For instance `String` inherits from `Object`. In this book, we'll indicate this relation by a vertical arrow as in Fig.3. -!images/ch_minimum_supersub.jpg(Inheritance)! +
+ figure 3: Inheritance +
figure 3: Inheritance
+
In the case of this illustration, the inherited class (`Object`) is called superclass or superior class. The inheriting class (`String`) is called @@ -634,10 +647,10 @@ Anyway let's try it out. Let our created class inherit from another class. To inherit from another class ( or designate a superclass) write the following. -
+```TODO-lang
 class C < SuperClassName
 end
-
+``` When we leave out the superclass like in the cases before the class `Object` becomes tacitly the superclass. @@ -647,7 +660,7 @@ Handing over means that the methods which were defined in the superclass also work in the subclass as if they were defined in there once more. Let's check it out. -
+```TODO-lang
 class C
   def hello()
     return "hello"
@@ -659,22 +672,22 @@ end
 
 sub = Sub.new()
 p(sub.hello())   # Shows "hello"
-
+``` `hello` was defined in the class `C` but we could call it on an instance of the class `Sub` as well. Of course we don't need to assign variables. The above is the same as the line below. -
+```TODO-lang
 p(Sub.new().hello())
-
+``` By defining a method with the same name, we can overwrite the method. In C++ and Object Pascal (Delphi) it's only possible to overwrite functions explicitly defined with the keyword `virtual` but in Ruby every method can be overwritten unconditionally. -
+```TODO-lang
 class C
   def hello()
     return "Hello"
@@ -689,7 +702,7 @@ end
 
 p(Sub.new().hello())   # Shows "Hello from Sub"
 p(C.new().hello())     # Shows "Hello"
-
+``` We can inherit over several steps. For instance as in Fig.4 `Fixnum` inherits every method from `Object`, `Numeric` and `Integer`. @@ -697,7 +710,10 @@ When there are methods with the same name the nearer classes take preference. As type overloading isn't there at all the requisites are extremely straightforward. -!images/ch_minimum_multiinherit.jpg(Inheritance over multiple steps)! +
+ figure 4: Inheritance over multiple steps +
figure 4: Inheritance over multiple steps
+
In C++ it's possible to create a class which inherits nothing. While in Ruby one has to inherit from the `Object` class either @@ -706,13 +722,16 @@ relations it becomes a single tree with `Object` at the top. For example, when we draw a tree of the inheritance relations among the important classes of the basic library, it would look like Fig.5. -!images/ch_minimum_classtree.jpg(Ruby's class tree)! +
+ figure 5: Ruby's class tree +
figure 5: Ruby's class tree
+
Once the superclass is appointed ( in the definition statement ) it's impossible to change it. In other words, one can add a new class to the class tree but cannot change a position or delete a class. -h3. Inheritance of Variables……? +### Inheritance of Variables……? In Ruby (instance) variables aren't inherited. Even though trying to inherit, @@ -724,7 +743,7 @@ become defined. Then, since the namespace of instance variables is completely flat based on each instance, it can be accessed by a method of whichever class. -
+```TODO-lang
 class A
   def initialize()   # called from when processing new()
     @i = "ok"
@@ -738,7 +757,7 @@ class B < A
 end
 
 B.new().print_i()   # Shows "ok"
-
+``` If you can't agree with this behavior, let's forget about classes and inheritance. When there's an instance `obj` of @@ -747,9 +766,12 @@ defined in `C`. Of course we keep the overwrite rule in mind. Then the methods of `C` get attached to the instance `obj` (Fig.6). This strong palpability is a specialty of Ruby's object orientation. -!images/ch_minimum_objimage.jpg(A conception of a Ruby object)! +
+ figure 6: A conception of a Ruby object +
figure 6: A conception of a Ruby object
+
-h3. Modules +### Modules Only a single superclass can be designated. So Ruby looks like single inheritance. But because of modules it has in practice @@ -760,27 +782,27 @@ In short, modules are classes for which a superclass cannot be designated and instances cannot be created. For the definition we write as follows. -
+```TODO-lang
 module M
 end
-
+``` Here the module `M` was defined. Methods are defined exactly the same way as for classes. -
+```TODO-lang
 module M
   def myupcase( str )
     return str.upcase()
   end
 end
-
+``` But because we cannot create instances, we cannot call them directly. To do that, we use the module by "including" it into other classes. Then we become to be able to deal with it as if a class inherited the module. -
+```TODO-lang
 module M
   def myupcase( str )
     return str.upcase()
@@ -792,7 +814,7 @@ class C
 end
 
 p(C.new().myupcase("content"))  # "CONTENT" is shown
-
+``` Even though no method was defined in the class `C` we can call the method `myupcase`. @@ -803,14 +825,14 @@ There's no limit on defining methods or accessing instance variables. I said we cannot specify any superclass of a module, but other modules can be included. -
+```TODO-lang
 module M
 end
 
 module M2
   include M
 end
-
+``` In other words it's functionally the same as appointing a superclass. But a class cannot come above a module. Only modules are allowed @@ -818,7 +840,7 @@ above modules. The example below also contains the inheritance of methods. -
+```TODO-lang
 module OneMore
   def method_OneMore()
     p("OneMore")
@@ -839,17 +861,20 @@ end
 
 C.new().method_M()         # Output "M"
 C.new().method_OneMore()   # Output "OneMore"
-
+``` As with classes when we sketch inheritance it looks like Fig.7 -!images/ch_minimum_modinherit.jpg(multilevel inclusion)! +
+ figure 7: multilevel inclusion +
figure 7: multilevel inclusion
+
Besides, the class `C` also has a superclass. How is its relationship to modules? For instance, let's think of the following case. -
+```TODO-lang
 # modcls.rb
 
 class Cls
@@ -869,32 +894,39 @@ class C < Cls
 end
 
 p(C.new().test())   # "class"? "module"?
-
+``` `C` inherits from `Cls` and includes `Mod`. Which will be shown in this case, `"class"` or `"module"`? In other words, which one is "closer", class or module? We'd better ask Ruby about Ruby, thus let's execute it: -
+```TODO-lang
 % ruby modcls.rb
 "module"
-
+``` Apparently a module takes preference before the superclass. In general, in Ruby when a module is included, it would be inherited by going in between the class and the superclass. As a picture it might look like Fig.8. -!images/ch_minimum_modclass.jpg(The relation between modules and classes)! +
+ figure 8: The relation between modules and classes +
figure 8: The relation between modules and classes
+
And if we also taking the modules included in the module into accounts, it would look like Fig.9. -!images/ch_minimum_modclass2.jpg(The relation between modules and classes(2))! +
+ figure 9: The relation between modules and classes(2 +
figure 9: The relation between modules and classes(2
+
-h2. The Program revisited +The Program revisited +===================== Caution. This section is extremely important and explaining the elements which are not easy to mix with for programmers who have only used static languages before. @@ -902,26 +934,26 @@ For other parts just skimming is sufficient, but for only this part I'd like you to read it carefully. The explanation will also be relatively attentive. -h3. Nesting of Constants +### Nesting of Constants First a repetition of constants. As a constant begins with a capital letter the definition goes as follows. -
+```TODO-lang
 Const = 3
-
+``` Now we reference the constant in this way. -
+```TODO-lang
 p(Const)   # Shows 3
-
+``` Actually we can also write this. -
+```TODO-lang
 p(::Const)   # Shows 3 in the same way.
-
+``` The `::` in front shows that it's a constant defined at the top level. You can think of the path in a filesystem. Assume there is a file `vmunix` @@ -935,14 +967,14 @@ However mentioning both is cumbersome, so I'll just subsume them under class definition. When one enters a class definition the level for constants rises ( as if entering a directory). -
+```TODO-lang
 class SomeClass
   Const = 3
 end
 
 p(::SomeClass::Const)   # Shows 3
 p(  SomeClass::Const)   # The same. Shows 3
-
+``` `SomeClass` is defined at toplevel. Hence one can reference it by writing either `SomeClass` or `::SomeClass`. @@ -953,26 +985,26 @@ As we can create a directory in a directory, we can create a class inside a class. For instance like this: -
+```TODO-lang
 class C        # ::C
   class C2     # ::C::C2
     class C3   # ::C::C2::C3
     end
   end
 end
-
+``` By the way, for a constant defined in a class definition statement, should we always write its full name? Of course not. As with the filesystem, if one is inside the same class definition one can skip the `::`. It becomes like that: -
+```TODO-lang
 class SomeClass
   Const = 3
   p(Const)   # Shows 3.
 end
-
+``` "What?" you might think. Surprisingly, even if it is in a class definition statement, @@ -984,7 +1016,7 @@ Let's add that we can of course also view a constant inside a method. The reference rules are the same as within the class definition (outside the method). -
+```TODO-lang
 class C
   Const = "ok"
   def test()
@@ -993,9 +1025,9 @@ class C
 end
 
 C.new().test()   # Shows "ok"
-
+``` -h3. Everything is executed +### Everything is executed Looking at the big picture I want to write one more thing. In Ruby almost the whole parts of program is "executed". @@ -1005,7 +1037,7 @@ and almost all the rest is executed in the apparent order. Look for instance at the following code. I used various constructions which have been used before. -
+```TODO-lang
  1:  p("first")
  2:
  3:  class C < Object
@@ -1019,7 +1051,7 @@ I used various constructions which have been used before.
 11:  end
 12:
 13:  p(C.new().myupcase("content"))
-
+``` This program is executed in the following order: @@ -1033,7 +1065,7 @@ This program is executed in the following order: |`9: return str.upcase()`|Returns `"CONTENT"`| |`13: p(...)`|Shows `"CONTENT"`| -h3. The Scope of Local Variables +### The Scope of Local Variables At last we can talk about the scope of local variables. @@ -1042,7 +1074,7 @@ have each completely independent local variable scope. In other words, the `lvar` variables in the following program are all different variables, and they do not influence each other. -
+```TODO-lang
 lvar = 'toplevel'
 
 class C
@@ -1059,9 +1091,9 @@ module M
 end
 
 p(lvar)   # Shows "toplevel"
-
+``` -h3. `self` as context +### `self` as context Previously, I said that during method execution oneself (an object on which the method was called) becomes self. @@ -1081,7 +1113,10 @@ the module `Kernel` is included. In there the function-flavor methods like `p` and `puts` are defined (Fig.10). That's why one can call `puts` and `p` also at the toplevel. -!images/ch_minimum_Kernel.jpg(`main`, `Object` and `Kernel`)! +
+ figure 10: `main`, `Object` and `Kernel` +
figure 10: `main`, `Object` and `Kernel`
+
Thus `p` isn't a function, it's a method. Just because it is defined in `Kernel` and thus can be called like a function as "its own" @@ -1098,22 +1133,22 @@ Well, since `self` is setup everywhere, The `self` in the class definition is the class itself (the class object). Hence it would look like this. -
+```TODO-lang
 class C
   p(self)   # C
 end
-
+``` What should this be good for? In fact, we've already seen an example in which it is very useful. This one. -
+```TODO-lang
 module M
 end
 class C
   include M
 end
-
+``` This `include` is actually a method call to the class object `C`. I haven't mentioned it yet but the parentheses around arguments @@ -1121,14 +1156,14 @@ can be omitted for method calls. And I omitted the parentheses around `include` such that it doesn't look like a method call because we have not finished the talk about class definition statement. -h3. Loading +### Loading In Ruby the loading of libraries also happens at runtime. Normally one writes this. -
+```TODO-lang
 require("library_name")
-
+``` The impression isn't false, `require` is a method. It's not even a reserved word. When it is written this way, @@ -1138,10 +1173,10 @@ As there is no concept like Java packages in Ruby, when we'd like to separate namespaces, it is done by putting files into a directory. -
+```TODO-lang
 require("somelib/file1")
 require("somelib/file2")
-
+``` And in the library usually classes and such are defined with `class` statements or `module` statements. The constant scope of the top level is flat without the @@ -1149,7 +1184,7 @@ distinction of files, so one can see classes defined in another file without any special preparation. To partition the namespace of class names one has to explicitly nest modules as shown below. -
+```TODO-lang
 # example of the namespace partition of net library
 module Net
   class SMTP
@@ -1162,11 +1197,12 @@ module Net
     # ...
   end
 end
-
+``` -h2. More about Classes +More about Classes +================== -h3. The talk about Constants still goes on +### The talk about Constants still goes on Up to now we used the filesystem metaphor for the scope of constants, but I want you to completely forget that. @@ -1174,18 +1210,18 @@ the scope of constants, but I want you to completely forget that. There is more about constants. Firstly one can also see constants in the "outer" class. -
+```TODO-lang
 Const = "ok"
 class C
   p(Const)   # Shows "ok"
 end
-
+``` The reason why this is designed in this way is because this becomes useful when modules are used as namespaces. Let's explain this by adding a few things to the previous example of `net` library. -
+```TODO-lang
 module Net
   class SMTP
     # Uses Net::SMTPHelper in the methods
@@ -1193,7 +1229,7 @@ module Net
   class SMTPHelper   # Supports the class Net::SMTP
   end
 end
-
+``` In such case, it's convenient if we can refer to it also from the `SMTP` class just by writing `SMTPHelper`, isn't it? @@ -1203,7 +1239,7 @@ The outer class can be referenced no matter how many times it is nesting. When the same name is defined on different levels, the one which will first be found from within will be referred to. -
+```TODO-lang
 Const = "far"
 class C
   Const = "near" # This one is closer than the one above
@@ -1213,20 +1249,20 @@ class C
     end
   end
 end
-
+``` There's another way of searching constants. If the toplevel is reached when going further and further outside then the own superclass is searched for the constant. -
+```TODO-lang
 class A
   Const = "ok"
 end
 class B < A
   p(Const)   # "ok" is shown
 end
-
+``` Really, that's pretty complicated. @@ -1234,7 +1270,7 @@ Let's summarize. When looking up a constant, first the outer classes is searched then the superclasses. This is quite contrived, but let's assume a class hierarchy as follows. -
+```TODO-lang
 class A1
 end
 class A2 < A1
@@ -1254,13 +1290,16 @@ class A3 < A2
     end
   end
 end
-
+``` When the constant `Const` in `C3` is referenced, it's looked up in the order depicted in Fig.11. -!images/ch_minimum_constref.jpg(Search order for constants)! +
+ figure 11: Search order for constants +
figure 11: Search order for constants
+
Be careful about one point. The superclasses of the classes outside, for instance `A1` and `B2`, aren't searched at all. @@ -1268,61 +1307,67 @@ If it's outside once it's always outside and if it's superclass once it's always superclass. Otherwise, the number of classes searched would become too big and the behavior of such complicated thing would become unpredictable. -h3. Metaclasses +### Metaclasses I said that a method can be called on if it is an object. I also said that the methods that can be called are determined by the class of an object. Then shouldn't there be a class for class objects? (Fig.12) -!images/ch_minimum_classclass.jpg(A class of classes?)! +
+ figure 12: A class of classes? +
figure 12: A class of classes?
+
In this kind of situation, in Ruby, we can check in practice. It's because there's "a method which returns the class (class object) to which an object itself belongs", `Object#class`. -
+```TODO-lang
 p("string".class())   # String is shown
 p(String.class())     # Class is shown
 p(Object.class())     # Class is shown
-
+``` Apparently `String` belongs to the class named `Class`. Then what's the class of `Class`? -
+```TODO-lang
 p(Class.class())      # Class is shown
-
+``` Again `Class`. In other words, whatever object it is, by following like `.class().class().class()` ..., it would reach `Class` in the end, then it will stall in the loop (Fig.13). -!images/ch_minimum_ccc.jpg(The class of the class of the class...)! +
+ figure 13: The class of the class of the class... +
figure 13: The class of the class of the class...
+
`Class` is the class of classes. And what has a recursive structure as "X of X" is called a meta-X. Hence `Class` is a metaclass. -h3. Metaobjects +### Metaobjects Let's change the target and think about modules. As modules are also objects, there also should be a class for them. Let's see. -
+```TODO-lang
 module M
 end
 p(M.class())   # Module is shown
-
+``` The class of a module seems to be `Module`. And what should be the class of the class `Module`? -
+```TODO-lang
 p(Module.class())   # Class
-
+``` It's again `Class` @@ -1330,17 +1375,20 @@ Now we change the direction and examine the inheritance relationships. What's the superclass of `Class` and `Module`? In Ruby, we can find it out with `Class#superclass`. -
+```TODO-lang
 p(Class.superclass())    # Module
 p(Module.superclass())   # Object
 p(Object.superclass())   # nil
-
+``` So `Class` is a subclass of `Module`. Based on these facts, Figure 14 shows the relationships between the important classes of Ruby. -!images/ch_minimum_metaobjects.jpg(The class relationship between the important Ruby classes)! +
+ figure 14: The class relationship between the important Ruby classes +
figure 14: The class relationship between the important Ruby classes
+
Up to now we used `new` and `include` without any explanation, but finally I can explain their true form. `new` is really a method defined for the class `Class`. @@ -1355,7 +1403,7 @@ foundation of Ruby. We can say that these three objects describe the Ruby's object world itself. Namely they are objects which describe objects. Hence, `Object Module Class` are Ruby's "meta-objects". -h3. Singleton Methods +### Singleton Methods I said that methods can be called if it is an object. I also said that the methods that can be called are determined by the object's class. @@ -1367,13 +1415,13 @@ Actually In Ruby there's also a means to define methods for individual objects ( not depending on the class. To do this, you can write this way. -
+```TODO-lang
 obj = Object.new()
 def obj.my_first()
   puts("My first singleton method")
 end
 obj.my_first()   # Shows My first singleton method
-
+``` As you already know `Object` is the root for every class. It's very unlikely that a method whose name is so weird like `my_first` is @@ -1395,9 +1443,9 @@ deletes a file entry from the filesystem. In Ruby it can be used directly as the singleton method `unlink` of the `File` class. Let's try it out. -
+```TODO-lang
 File.unlink("core")  # deletes the coredump
-
+``` It's cumbersome to say "the singleton method `unlink` of the object `File`". We simply write `File.unlink`. Don't mix @@ -1410,14 +1458,14 @@ for the method `write` defined in `File`. |`File.unlink`|the `File`class itself|`File.unlink("core")`| |`File#write`|an instance of `File`|`f.write("str")`| -h3. Class Variables +### Class Variables Class variables were added to Ruby from 1.6 on, they are a relatively new mechanism. As with constants, they belong to a class, and they can be referenced and assigned from both the class and its instances. Let's look at an example. The beginning of the name is `@@`. -
+```TODO-lang
 class C
   @@cvar = "ok"
   p(@@cvar)      # "ok" is shown
@@ -1428,21 +1476,21 @@ class C
 end
 
 C.new().print_cvar()  # "ok" is shown
-
+``` As the first assignment serves as the definition, a reference before an assignment like the one shown below leads to a runtime error. There is an ´@´ in front but the behavior differs completely from instance variables. -
+```TODO-lang
 % ruby -e '
 class C
   @@cvar
 end
 '
 -e:3: uninitialized class variable @@cvar in C (NameError)
-
+``` Here I was a bit lazy and used the -e option. The program is the three lines between the single quotes. @@ -1451,7 +1499,7 @@ Class variables are inherited. Or saying it differently, a variable in a superior class can be assigned and referenced in the inferior class. -
+```TODO-lang
 class A
   @@cvar = "ok"
 end
@@ -1464,17 +1512,18 @@ class B < A
 end
 
 B.new().print_cvar()   # Shows "ok"
-
+``` -h2. Global Variables +Global Variables +================ At last there are also global variables. They can be referenced from everywhere and assigned everywhere. The first letter of the name is a `$`. -
+```TODO-lang
 $gvar = "global variable"
 p($gvar)   # Shows "global variable"
-
+``` As with instance variables, all kinds of names can be considered defined for global variables before assignments. diff --git a/module.textile b/module.md similarity index 96% rename from module.textile rename to module.md index 6946e17..b22b111 100644 --- a/module.textile +++ b/module.md @@ -16,10 +16,11 @@ behaviors of the variable definitions and the variable references. -h2. The Ruby stack +The Ruby stack +============== -h3. Context and Stack +### Context and Stack With an image of a typical procedural language, each time calling a procedure, @@ -101,7 +102,7 @@ even for just one method call. -h3. @ruby_frame@ +### @ruby_frame@ @ruby_frame@ is a stack to record method calls. The stack frame struct is @@ -112,7 +113,7 @@ general noun and @FRAME@ when it means @struct FRAME@.

▼ @ruby_frame@

-
+```TODO-lang
   16  extern struct FRAME {
   17      VALUE self;          /* self */
   18      int argc;            /* the argument count */
@@ -132,7 +133,7 @@ general noun and @FRAME@ when it means @struct FRAME@.
   34  #define FRAME_MALLOC 1   /* FRAME is allocated by malloc */
 
 (env.h)
-
+``` First af all, since there's the @prev@ member, you can infer that the stack is @@ -178,13 +179,13 @@ For instance, -
+```TODO-lang
 class C
   def orig() end
   alias ali orig
 end
 C.new.ali
-
+``` in this case, @last_func=ali@ and @orig_func=orig@. @@ -193,7 +194,7 @@ Not surprisingly, these members also have to do with @super@. -h3. @ruby_scope@ +### @ruby_scope@ @ruby_scope@ is the stack to represent the local variable scope. The method and @@ -205,7 +206,7 @@ I'll call this frame @SCOPE@.

▼ @ruby_scope@

-
+```TODO-lang
   36  extern struct SCOPE {
   37      struct RBasic super;
   38      ID *local_tbl;        /* an array of the local variable names */
@@ -219,7 +220,7 @@ I'll call this frame @SCOPE@.
   46  #define SCOPE_DONT_RECYCLE 4    /* Proc is created with this SCOPE */
 
 (env.h)
-
+``` Since the first element is @struct RBasic@, this is a Ruby object. This is in @@ -228,7 +229,7 @@ like this: -
+```TODO-lang
 def make_counter
   lvar = 0
   return Proc.new { lvar += 1 }
@@ -239,7 +240,7 @@ p cnt.call    # 1
 p cnt.call    # 2
 p cnt.call    # 3
 cnt = nil  # cut the reference. The created Proc finally becomes unnecessary here.
-
+``` The @Proc@ object created by this method will persist longer than the method that @@ -258,7 +259,7 @@ variable space is unnecessary. -h3. @ruby_block@ +### @ruby_block@ @struct BLOCK@ is the real body of a Ruby's iterator block or a @Proc@ object, @@ -269,7 +270,7 @@ This frame will also be briefly written as @BLOCK@ as in the same manner as

▼ @ruby_block@

-
+```TODO-lang
  580  static struct BLOCK *ruby_block;
 
  559  struct BLOCK {
@@ -301,7 +302,7 @@ This frame will also be briefly written as @BLOCK@ as in the same manner as
 
 
 (eval.c)
-
+``` Note that @frame@ is not a pointer. This is because the entire content of @@ -318,7 +319,7 @@ which were created from the one same block have the same @BLOCKTAG@. -h3. @ruby_iter@ +### @ruby_iter@ The stack @ruby_iter@ indicates whether currently calling method is an iterator @@ -328,7 +329,7 @@ But for consistency I'll call it @ITER@.

▼ @ruby_iter@

-
+```TODO-lang
  767  static struct iter *ruby_iter;
 
  763  struct iter {
@@ -340,7 +341,7 @@ But for consistency I'll call it @ITER@.
  770  #define ITER_PRE 1      /* the method which is going to be evaluated next is an iterator */
  771  #define ITER_CUR 2      /* the currently evaluated method is an iterator */
 (eval.c)
-
+``` Although for each method we can determine whether it is an iterator or not, @@ -358,7 +359,7 @@ This will be discussed in detail in Chapter 16: Blocks. -h3. @ruby_dyna_vars@ +### @ruby_dyna_vars@ The block local variable space. The frame struct is @struct RVarmap@ that has @@ -367,7 +368,7 @@ already seen in Part 2. From now on, I'll call it just @VARS@.

▼ @struct RVarmap@

-
+```TODO-lang
   52  struct RVarmap {
   53      struct RBasic super;
   54      ID id;                  /* the name  of the variable */
@@ -376,7 +377,7 @@ already seen in Part 2. From now on, I'll call it just @VARS@.
   57  };
 
 (env.h)
-
+``` Note that a frame is not a single @struct RVarmap@ but a list of the structs (Fig.3). @@ -396,7 +397,7 @@ Fig.3: @ruby_dyna_vars@ -h3. @ruby_class@ +### @ruby_class@ @ruby_class@ represents the current class to which a method is defined. Since @@ -417,7 +418,7 @@ From now on, I'll call this frame @CLASS@. -h3. @ruby_cref@ +### @ruby_cref@ @ruby_cref@ represents the information of the nesting of a class. @@ -427,11 +428,11 @@ Its struct is ...

▼ @ruby_cref@

-
+```TODO-lang
  847  static NODE *ruby_cref = 0;
 
 (eval.c)
-
+``` ... surprisingly @NODE@. This is used just as a "defined struct which can be @@ -451,7 +452,7 @@ explain the actual appearance. -
+```TODO-lang
 class A
   class B
     class C
@@ -459,7 +460,7 @@ class A
     end
   end
 end
-
+``` Fig.4 shows how @ruby_cref@ is when evaluating the code (A). @@ -475,12 +476,12 @@ Therefore, the same state as Fig.4 will be expressed in the following notation: -
+```TODO-lang
 A ← B ← C
-
+``` -h3. @PUSH@ / @POP@ Macros +### @PUSH@ / @POP@ Macros For each stack frame struct, the macros to push and pop are available. For instance, @PUSH_FRAME@ and @POP_FRAME@ for @FRAME@. @@ -488,7 +489,7 @@ Because these will appear in a moment, I'll then explain the usage and content. -h3. The other states +### The other states While they are not so important as the main stacks, the evaluator of @ruby@ has @@ -509,7 +510,8 @@ are not stacks. Actually, most of them are not. -h2. Module Definition +Module Definition +================= The @class@ statement and the @module@ statement and the singleton class @@ -538,20 +540,20 @@ Now, let's start to look at the codes. -h3. Investigation +### Investigation

▼The Source Program

-
+```TODO-lang
 module M
   a = 1
 end
-
+```

▼Its Syntax Tree

-
+```TODO-lang
 NODE_MODULE
 nd_cname = 9621 (M)
 nd_body:
@@ -564,7 +566,7 @@ nd_body:
         nd_value:
             NODE_LIT
             nd_lit = 1:Fixnum
-
+``` @nd_cname@ seems the module name. @cname@ is probably either Const NAME or Class @@ -576,7 +578,7 @@ plays an important role to create a local variable scope. -h3. @NODE_MODULE@ +### @NODE_MODULE@ Let's examine the handler of @NODE_MODULE@ of @rb_eval()@. The parts that are @@ -587,7 +589,7 @@ it has already became unnecessary to show the original code.

▼ @rb_eval()@ − @NODE_MODULE@ (simplified)

-
+```TODO-lang
 case NODE_MODULE:
   {
       VALUE module;
@@ -606,7 +608,7 @@ case NODE_MODULE:
       result = module_setup(module, node->nd_body);
   }
   break;
-
+``` First, we'd like to make sure the module is nested and defined above (the module holded by) @ruby_class@. @@ -622,7 +624,7 @@ we can do "additional" definitions on the same one module any number of times. -
+```TODO-lang
 module M
   def a    # M#a is deifned
   end
@@ -631,7 +633,7 @@ module M   # add a definition (not re-defining or overwriting)
   def b    # M#b is defined
   end
 end
-
+``` In this program, the two methods, @a@ and @b@, will be defined on the module @M@. @@ -654,7 +656,7 @@ an argument. -h3. @module_setup@ +### @module_setup@ For the module and class and singleton class statements, @module_setup()@ @@ -664,7 +666,7 @@ large amounts.

▼ @module_setup()@

-
+```TODO-lang
 3424  static VALUE
 3425  module_setup(module, n)
 3426      VALUE module;
@@ -726,7 +728,7 @@ large amounts.
 3481  }
 
 (eval.c)
-
+``` This is too big to read all in one gulp. @@ -762,7 +764,7 @@ Consequently, it could be summarized as follows:

▼ @module_setup@ (simplified)

-
+```TODO-lang
 static VALUE
 module_setup(module, node)
     VALUE module;
@@ -791,7 +793,7 @@ module_setup(module, node)
     end
     return result;
 }
-
+``` It does @rb_eval()@ with @node->nd_next@, @@ -811,7 +813,7 @@ Let's investigate them in order. -h3. Creating a local variable scope +### Creating a local variable scope @PUSH_SCOPE@ pushes a local variable space and @PUSH_VARS()@ pushes a block @@ -821,7 +823,7 @@ Let's examine the contents of these macros and what is done.

▼ @PUSH_SCOPE() POP_SCOPE()@

-
+```TODO-lang
  852  #define PUSH_SCOPE() do {               \
  853      volatile int _vmode = scope_vmode;  \
  854      struct SCOPE * volatile _old;       \
@@ -852,7 +854,7 @@ Let's examine the contents of these macros and what is done.
  884  } while (0)
 
 (eval.c)
-
+``` As the same as tags, @SCOPE@ s also create a stack by being synchronized with the @@ -875,7 +877,7 @@ Thus, these will be discussed in Chapter 16: Blocks all at once. -h3. Allocating the local variable space +### Allocating the local variable space As I mentioned many times, the local variable scope is represented by @struct SCOPE@. @@ -887,7 +889,7 @@ following part of @module_setup@ prepares the array.

▼The preparation of the local variable slots

-
+```TODO-lang
 3444  if (node->nd_tbl) {
 3445      VALUE *vars = TMP_ALLOC(node->nd_tbl[0]+1);
 3446      *vars++ = (VALUE)node;
@@ -901,7 +903,7 @@ following part of @module_setup@ prepares the array.
 3454  }
 
 (eval.c)
-
+``` The @TMP_ALLOC()@ at the beginning will be described in the next section. If I @@ -932,7 +934,7 @@ access in @gc.c@.

▼ @rb_gc_mark_children()@ — @T_SCOPE@

-
+```TODO-lang
  815  case T_SCOPE:
  816    if (obj->as.scope.local_vars &&
             (obj->as.scope.flags & SCOPE_MALLOC)) {
@@ -947,7 +949,7 @@ access in @gc.c@.
  825    break;
 
 (gc.c)
-
+``` Apparently, this is a mechanism to protect @node@ from GC. @@ -963,11 +965,11 @@ line of the next line:

▼ @ruby_scope->local_tbl@

-
+```TODO-lang
 3449  ruby_scope->local_tbl = node->nd_tbl;
 
 (eval.c)
-
+``` The local variable name table prepared by the parser is directly used. When is @@ -1003,7 +1005,7 @@ initialized as @nil@. -h3. @TMP_ALLOC@ +### @TMP_ALLOC@ Next, let's read @TMP_ALLOC@ that allocates the local variable space. @@ -1012,12 +1014,12 @@ beginning of @module_setup()@. Its typical usage is this: -
+```TODO-lang
 VALUE *ptr;
 TMP_PROTECT;
 
 ptr = TMP_ALLOC(size);
-
+``` The reason why @TMP_PROTECT@ is in the place for the local variable definitions @@ -1026,7 +1028,7 @@ is that ... Let's see its definition.

▼ @TMP_ALLOC()@

-
+```TODO-lang
 1769  #ifdef C_ALLOCA
 1770  # define TMP_PROTECT NODE * volatile tmp__protect_tmp=0
 1771  # define TMP_ALLOC(n) \
@@ -1039,7 +1041,7 @@ is that ... Let's see its definition.
 1778  #endif
 
 (eval.c)
-
+``` ... it is because it defines a local variable. @@ -1072,7 +1074,7 @@ but because the core of the evaluator is the biggest bottleneck of @ruby@, -h3. Changing the place to define methods on. +### Changing the place to define methods on. The value of the stack @ruby_class@ is the place to define a method on at the @@ -1083,13 +1085,13 @@ Here is the code for it: -
+```TODO-lang
 PUSH_CLASS();
 ruby_class = module;
      :
      :
 POP_CLASS();
-
+``` Why is there the assignment to @ruby_class@ after doing @PUSH_CLASS()@. @@ -1098,7 +1100,7 @@ We can understand it unexpectedly easily by looking at the definition.

▼ @PUSH_CLASS() POP_CLASS()@

-
+```TODO-lang
  841  #define PUSH_CLASS() do { \
  842      VALUE _class = ruby_class
 
@@ -1106,7 +1108,7 @@ We can understand it unexpectedly easily by looking at the definition.
  845  } while (0)
 
 (eval.c)
-
+``` Because @ruby_class@ is not modified even though @PUSH_CLASS@ is done, @@ -1121,7 +1123,7 @@ places we cannot obtain the class before pushing, it is in this way. -h3. Nesting Classes +### Nesting Classes @ruby_cref@ represents the class nesting information at runtime. Therefore, it's naturally predicted that @ruby_cref@ will be pushed on the module statements or @@ -1130,13 +1132,13 @@ In @module_setup()@, it is pushed as follows: -
+```TODO-lang
 PUSH_CREF(module);
 ruby_frame->cbase = (VALUE)ruby_cref;
    :
    :
 POP_CREF();
-
+``` Here, @module@ is the module being defined. @@ -1145,13 +1147,13 @@ Let's also see the definitions of @PUSH_CREF()@ and @POP_CREF()@.

▼ @PUSH_CREF() POP_CREF()@

-
+```TODO-lang
  849  #define PUSH_CREF(c) \
           ruby_cref = rb_node_newnode(NODE_CREF,(c),0,ruby_cref)
  850  #define POP_CREF() ruby_cref = ruby_cref->nd_next
 
 (eval.c)
-
+``` Unlike @PUSH_SCOPE@ or something, there are not any complicated techniques and @@ -1166,7 +1168,7 @@ Details will be discussed in the last section of this chapter. -h3. Replacing frames +### Replacing frames Lastly, let's focus on the manipulation of @ruby_frame@. The first thing is its @@ -1174,9 +1176,9 @@ definition: -
+```TODO-lang
 struct FRAME frame;
-
+``` It is not a pointer. This means that the entire @FRAME@ is allocated on the stack. @@ -1190,14 +1192,14 @@ Then next, let's look at where doing several things with @frame@. -
+```TODO-lang
 frame = *ruby_frame;      /* copy the entire struct */
 frame.tmp = ruby_frame;   /* protect the original FRAME from GC */
 ruby_frame = &frame;      /* replace ruby_frame */
        :
        :
 ruby_frame = frame.tmp;   /* restore */
-
+``` That is, @ruby_frame@ seems temporarily replaced (not pushing). @@ -1218,13 +1220,13 @@ The backtraces are things displayed like followings: -
+```TODO-lang
 % ruby t.rb
 t.rb:11:in `c': some error occured (ArgumentError)
         from t.rb:7:in `b'
         from t.rb:3:in `a'
         from t.rb:14
-
+``` But the module statements and the class statements are not method calls, @@ -1236,27 +1238,28 @@ of "pushed". -h2. The method definition +The method definition +===================== As the next topic of the module definitions, let's look at the method definitions. -h3. Investigation +### Investigation

▼The Source Program

-
+```TODO-lang
 def m(a, b, c)
   nil
 end
-
+```

▼Its Syntax Tree

-
+```TODO-lang
 NODE_DEFN
 nd_mid  = 9617 (m)
 nd_noex = 2 (NOEX_PRIVATE)
@@ -1270,7 +1273,7 @@ nd_defn:
         nd_rest = -1
         nd_opt = (null)
         NODE_NIL
-
+``` I dumped several things and found that there's always @NODE_SCOPE@ in @nd_defn@. @@ -1280,7 +1283,7 @@ the node to store the information to push a local variable scope. -h3. @NODE_DEFN@ +### @NODE_DEFN@ Subsequently, we will examine the corresponding code of @rb_eval()@. This part @@ -1291,7 +1294,7 @@ indirectly call @rb_raise() rb_warn() rb_warning()@.

▼ @rb_eval()@ − @NODE_DEFN@ (simplified)

-
+```TODO-lang
 NODE *defn;
 int noex;
 
@@ -1311,7 +1314,7 @@ else {
 defn = copy_node_scope(node->nd_defn, ruby_cref);
 rb_add_method(ruby_class, node->nd_mid, defn, noex);
 result = Qnil;
-
+``` In the first half, there are the words like @private@ or @protected@, so it is @@ -1344,10 +1347,10 @@ part is the next two lines. -
+```TODO-lang
 defn = copy_node_scope(node->nd_defn, ruby_cref);
 rb_add_method(ruby_class, node->nd_mid, defn, noex);
-
+``` @copy_node_scope()@ is a function to copy (only) @NODE_SCOPE@ attached to the @@ -1361,7 +1364,7 @@ The place to define on is of course @ruby_class@. -h3. @copy_node_scope()@ +### @copy_node_scope()@ @copy_node_scope()@ is called only from the two places: the method definition @@ -1372,7 +1375,7 @@ the usages at these two places are almost the same.

▼ @copy_node_scope()@

-
+```TODO-lang
 1752  static NODE*
 1753  copy_node_scope(node, rval)
 1754      NODE *node;
@@ -1391,7 +1394,7 @@ the usages at these two places are almost the same.
 1767  }
 
 (eval.c)
-
+``` I mentioned that the argument @rval@ is the information of the class nesting @@ -1414,7 +1417,7 @@ information will be used later when referring constants or class variables. -h3. @rb_add_method()@ +### @rb_add_method()@ The next thing is @rb_add_method()@ that is the function to register a method entry. @@ -1422,7 +1425,7 @@ The next thing is @rb_add_method()@ that is the function to register a method en

▼ @rb_add_method()@

-
+```TODO-lang
  237  void
  238  rb_add_method(klass, mid, node, noex)
  239      VALUE klass;
@@ -1444,7 +1447,7 @@ The next thing is @rb_add_method()@ that is the function to register a method en
  254  }
 
 (eval.c)
-
+``` @NEW_METHOD()@ is a macro to create @NODE@. @@ -1458,7 +1461,7 @@ I prepared @nodedump-method@ for this kind of purposes. -
+```TODO-lang
 % ruby -e '
 class C
   def m(a)
@@ -1486,7 +1489,7 @@ nd_body:
 ** unhandled**
 
 
-
+``` There are @NODE_METHOD@ at the top @@ -1513,7 +1516,8 @@ this time. It is used when having to do with @alias@. -h2. Assignment and Reference +Assignment and Reference +======================== Come to think of it, most of the stacks are used to realize a variety of @@ -1523,7 +1527,7 @@ code to reference variables. -h3. Local variable +### Local variable The all necessary information to assign or refer local variables has appeared, @@ -1542,7 +1546,7 @@ follows:

▼ @rb_eval()@ − @NODE_LVAR@

-
+```TODO-lang
 2975  case NODE_LVAR:
 2976    if (ruby_scope->local_vars == 0) {
 2977        rb_bug("unexpected local variable");
@@ -1551,7 +1555,7 @@ follows:
 2980    break;
 
 (eval.c)
-
+``` It goes without saying but @node->nd_cnt@ is the value that @local_cnt()@ of the @@ -1560,10 +1564,10 @@ parser returns. -h3. Constant +### Constant -h4. Complete Specification +#### Complete Specification In Chapter 6: Variables and constants, @@ -1580,7 +1584,7 @@ Take a look at the following code: -
+```TODO-lang
 class A
   C = 5
   def A.new
@@ -1588,7 +1592,7 @@ class A
     super
   end
 end
-
+``` @A.new@ is a singleton method of @A@, so its class is the singleton class @(A)@. @@ -1613,7 +1617,7 @@ the outer class. -h4. @cbase@ +#### @cbase@ Then, let's look at the code to refer constants including the outer class. @@ -1623,13 +1627,13 @@ The ordinary constant references to which @::@ is not attached, become

▼ @rb_eval()@ − @NODE_CONST@

-
+```TODO-lang
 2994  case NODE_CONST:
 2995    result = ev_const_get(RNODE(ruby_frame->cbase), node->nd_vid, self);
 2996    break;
 
 (eval.c)
-
+``` First, @nd_vid@ appears to be @Variable ID@ and it probably means a constant name. @@ -1669,7 +1673,7 @@ Fig 8. CREF Trasfer -h4. @ev_const_get()@ +#### @ev_const_get()@ Now, let's go back to the code of @NODE_CONST@. Since only @ev_const_get()@ is left, we'll look at it. @@ -1677,7 +1681,7 @@ Since only @ev_const_get()@ is left, we'll look at it.

▼ @ev_const_get()@

-
+```TODO-lang
 1550  static VALUE
 1551  ev_const_get(cref, id, self)
 1552      NODE *cref;
@@ -1701,7 +1705,7 @@ Since only @ev_const_get()@ is left, we'll look at it.
 1569  }
 
 (eval.c)
-
+``` (( According to the errata, the description of @ev_const_get()@ was wrong. @@ -1711,7 +1715,7 @@ Since only @ev_const_get()@ is left, we'll look at it. -h3. Class variable +### Class variable What class variables refer to is also @ruby_cref@. Needless to say, @@ -1728,7 +1732,7 @@ Let's look at it.

▼ @cvar_cbase()@

-
+```TODO-lang
 1571  static VALUE
 1572  cvar_cbase()
 1573  {
@@ -1745,7 +1749,7 @@ Let's look at it.
 1583  }
 
 (eval.c)
-
+``` It traverses @cbase@ up to the class that is not the singleton class, it @@ -1753,7 +1757,7 @@ seems. This feature is added to counter the following kind of code: -
+```TODO-lang
 class C                           class C
   @@cvar = 1                        @@cvar = 1
   class << C                        def C.m
@@ -1765,7 +1769,7 @@ class C                           class C
     end                           end
   end
 end
-
+``` Both the left and right code ends up defining the same method, @@ -1796,7 +1800,7 @@ first place, it is also not related. -h3. Multiple Assignment +### Multiple Assignment If someone asked "where is the most complicated specification of Ruby?", I @@ -1828,14 +1832,14 @@ First, following the standard, let's start with the syntax tree.

▼The Source Program

-
+```TODO-lang
 a, b = 7, 8
-
+```

▼Its Syntax Tree

-
+```TODO-lang
 NODE_MASGN
 nd_head:
     NODE_ARRAY [
@@ -1859,7 +1863,7 @@ nd_value:
             NODE_LIT
             nd_lit = 8:Fixnum
         ]
-
+``` Both the left-hand and right-hand sides are the lists of @NODE_ARRAY@, @@ -1869,13 +1873,13 @@ value EXPAND". We are curious about what this node is doing. Let's see.

▼ @rb_eval()@ − @NODE_REXPAND@

-
+```TODO-lang
 2575  case NODE_REXPAND:
 2576    result = avalue_to_svalue(rb_eval(self, node->nd_head));
 2577    break;
 
 (eval.c)
-
+``` You can ignore @avalue_to_svalue()@. @@ -1886,9 +1890,9 @@ evaluated. This enables even the following code: -
+```TODO-lang
 a, b = b, a    # swap variables in oneline
-
+``` Let's look at @NODE_MASGN@ in the left-hand side. @@ -1896,13 +1900,13 @@ Let's look at @NODE_MASGN@ in the left-hand side.

▼ @rb_eval()@ − @NODE_MASGN@

-
+```TODO-lang
 2923  case NODE_MASGN:
 2924    result = massign(self, node, rb_eval(self, node->nd_value),0);
 2925    break;
 
 (eval.c)
-
+``` Here is only the evaluation of the right-hand side, the rests are delegated to @@ -1911,12 +1915,12 @@ Here is only the evaluation of the right-hand side, the rests are delegated to -h4. @massign()@ +#### @massign()@

▼ @massi@ ……

-
+```TODO-lang
 3917  static VALUE
 3918  massign(self, node, val, pcall)
 3919      VALUE self;
@@ -1926,7 +1930,7 @@ h4. @massign()@
 3923  {
 
 (eval.c)
-
+``` I'm sorry this is halfway, but I'd like you to stop and pay attention to the @@ -1960,7 +1964,7 @@ final appearance is shown as follows:

▼ @massign()@ (simplified)

-
+```TODO-lang
 static VALUE
 massign(self, node, val  /* , pcall=0 */)
     VALUE self;
@@ -2000,7 +2004,7 @@ massign(self, node, val  /* , pcall=0 */)
     }
     return val;
 }
-
+``` @val@ is the right-hand side value. And there's the suspicious conversion called diff --git a/name.textile b/name.md similarity index 92% rename from name.textile rename to name.md index 1d6f616..ed5a966 100644 --- a/name.textile +++ b/name.md @@ -5,14 +5,16 @@ title: Names and Name Table Translated by Clifford Escobar CAOILE -h1. Chapter 3: Names and Name Table +Chapter 3: Names and Name Table +------------------------------- -h2. `st_table` +`st_table` +========== `st_table` has already appeared several times as a method table and an instance table. In this chapter let's look at the structure of the `st_table` in detail. -h3. Summary +### Summary I previously mentioned that the `st_table` is a hash table. What is a hash table? It is a data structure that records one-to-one relations, for example, a @@ -22,13 +24,13 @@ However, data structures other than hash tables can, of course, record one-to-one relations. For example, a list of the following structs will suffice for this purpose. -
+```TODO-lang
 struct entry {
     ID key;
     VALUE val;
     struct entry *next;  /* point to the next entry */
 };
-
+``` However, this method is slow. If the list contains a thousand items, in the worst case, it is necessary to traverse a thousand links. In other words, the @@ -42,12 +44,12 @@ created by Matsumoto, rather: ▼ `st.c` credits -
+```TODO-lang
    1  /* This is a public domain general purpose hash table package
          written by Peter Moore @ UCB. */
 
 (st.c)
-
+``` as shown above. @@ -55,12 +57,15 @@ By the way, when I searched Google and found another version, it mentioned that `st_table` is a contraction of "STring TABLE". However, I find it contradictory that it has both "general purpose" and "string" aspects. -h4. What is a hash table? +#### What is a hash table? A hash table can be thought as the following: Let us think of an array with `n` items. For example, let us make `n`=64 (figure 1). -!images/ch_name_array.png(Array)! +
+ figure 1: Array +
figure 1: Array
+
Then let us specify a function `f` that takes a key and produces an integer `i` from 0 to `n`-1 (0-63). We call this `f` a hash function. `f` when given the @@ -73,7 +78,10 @@ When recording relationships, given a key, function `f` generates `i`, and places the value into index `i` of the array we have prepared. Index access into an array is very fast. The key concern is changing a key into an integer. -!images/ch_name_aset.png(Array assignment)! +
+ figure 2: Array assignment +
figure 2: Array assignment
+
However, in the real world it isn't that easy. There is a critical problem with this idea. Because `n` is only 64, if there are more than 64 relationships to @@ -86,14 +94,20 @@ many ways to resolve such a collision. One solution is to insert into the next element when a collision occurs. This is called open addressing. (Figure 3). -!images/ch_name_nexti.png(Open addressing)! +
+ figure 3: Open addressing +
figure 3: Open addressing
+
Other than using the array like this, there are other possible approaches, like using a pointer to a respective linked list in each element of the array. Then when a collision occurs, grow the linked list. This is called chaining. (Figure 4) `st_table` uses this chaining method. -!images/ch_name_chain.png(Chaining)! +
+ figure 4: Chaining +
figure 4: Chaining
+
However, if it can be determined a priori what set of keys will be used, it is possible to imagine a hash function that will never create @@ -103,7 +117,7 @@ of arbitrary strings. GNU gperf is one of those. `ruby`'s parser implementation uses GNU gperf but... this is not the time to discuss it. We'll discuss this in the second part of the book. -h3. Data Structure +### Data Structure Let us start looking at the source code. As written in the introductory chapter, if there is data and code, it is better to read the data first. @@ -111,7 +125,7 @@ The following is the data type of `st_table`. ▼ `st_table` -
+```TODO-lang
    9  typedef struct st_table st_table;
 
   16  struct st_table {
@@ -122,11 +136,11 @@ The following is the data type of `st_table`.
   21  };
 
 (st.h)
-
+``` ▼ `struct st_table_entry` -
+```TODO-lang
   16  struct st_table_entry {
   17      unsigned int hash;
   18      char *key;
@@ -135,7 +149,7 @@ The following is the data type of `st_table`.
   21  };
 
 (st.c)
-
+``` `st_table` is the main table structure. `st_table_entry` is a holder that stores one value. `st_table_entry` contains a member called `next` which of @@ -144,32 +158,35 @@ course is used to make `st_table_entry` into a linked list. This is the chain explain this later. First let me explain the other parts so you can compare and understand the roles. -!images/ch_name_sttable.png(`st_table` data structure)! +
+ figure 5: `st_table` data structure +
figure 5: `st_table` data structure
+
So, let us comment on `st_hash_type`. ▼ `struct st_hash_type` -
+```TODO-lang
   11  struct st_hash_type {
   12      int (*compare)();   /* comparison function */
   13      int (*hash)();      /* hash function */
   14  };
 
 (st.h)
-
+``` This is still Chapter 3 so let us examine it attentively. -
+```TODO-lang
 int (*compare)()
-
+``` This part shows, of course, the member `compare` which has a data type of "a pointer to a function that returns an `int`". `hash` is also of the same type. This variable is substituted in the following way: -
+```TODO-lang
 int
 great_function(int n)
 {
@@ -180,14 +197,14 @@ great_function(int n)
 {
     int (*f)();
     f = great_function;
-
+``` And it is called like this: -
+```TODO-lang
     (*f)(7);
 }
-
+``` Here let us return to the `st_hash_type` commentary. Of the two members `hash` and `compare`, `hash` is the hash function `f` explained previously. @@ -215,7 +232,7 @@ to an object and pass it (around), so this mechanism is not necessary. Perhaps it more correct to say that this mechanism is built-in as a language's feature. -h3. `st_hash_type` example +### `st_hash_type` example The usage of a data structure like `st_hash_type` is good as an abstraction. On the other hand, what kind of code it actually passes @@ -227,7 +244,7 @@ integer data type keys. ▼ `st_init_numtable()` -
+```TODO-lang
  182  st_table*
  183  st_init_numtable()
  184  {
@@ -235,7 +252,7 @@ integer data type keys.
  186  }
 
 (st.c)
-
+``` `st_init_table()` is the function that allocates the table memory and so on. `type_numhash` is an `st_hash_type` (it is the member named "type" of `st_table`). @@ -243,7 +260,7 @@ Regarding this `type_numhash`: ▼ `type_numhash` -
+```TODO-lang
   37  static struct st_hash_type type_numhash = {
   38      numcmp,
   39      numhash,
@@ -264,12 +281,12 @@ Regarding this `type_numhash`:
  564  }
 
 (st.c)
-
+``` Very simple. The table that the `ruby` interpreter uses is by and large this `type_numhash`. -h3. `st_lookup()` +### `st_lookup()` Now then, let us look at the function that uses this data structure. First, it's a good idea to look at the function that does the searching. Shown below is the @@ -277,7 +294,7 @@ function that searches the hash table, `st_lookup()`. ▼ `st_lookup()` -
+```TODO-lang
  247  int
  248  st_lookup(table, key, value)
  249      st_table *table;
@@ -300,24 +317,24 @@ function that searches the hash table, `st_lookup()`.
  266  }
 
 (st.c)
-
+``` The important parts are pretty much in `do_hash()` and `FIND_ENTRY()`. Let us look at them in order. ▼ `do_hash()` -
+```TODO-lang
   68  #define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key))
 
 (st.c)
-
+``` Just in case, let us write down the macro body that is difficult to understand: -
+```TODO-lang
 (table)->type->hash
-
+``` is a function pointer where the `key` is passed as a parameter. This is the syntax for calling the function. `*` is not applied to `table`. In other words, @@ -328,7 +345,7 @@ Next, let us examine `FIND_ENTRY()`. ▼ `FIND_ENTRY()` -
+```TODO-lang
  235  #define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\
  236      bin_pos = hash_val%(table)->num_bins;\
  237      ptr = (table)->bins[bin_pos];\
@@ -348,16 +365,16 @@ Next, let us examine `FIND_ENTRY()`.
           ((x)==(y) || (*table->type->compare)((x),(y)) == 0)
 
 (st.c)
-
+``` `COLLISION` is a debug macro so we will (should) ignore it. The parameters of `FIND_ENTRY()`, starting from the left are: -# `st_table` -# the found entry will be pointed to by this parameter -# hash value -# temporary variable +* `st_table` +* the found entry will be pointed to by this parameter +* hash value +* temporary variable And, the second parameter will point to the found `st_table_entry*`. @@ -369,14 +386,14 @@ end. Also, there is no semicolon added after the `while(0)`. -
+```TODO-lang
 FIND_ENTRY();
-
+``` This is so that the semicolon that is normally written at the end of an expression will not go to waste. -h3. `st_add_direct()` +### `st_add_direct()` Continuing on, let us examine `st_add_direct()` which is a function that adds a new relationship to the hash table. This function does not check if the key is @@ -385,7 +402,7 @@ in the function name. ▼ `st_add_direct()` -
+```TODO-lang
  308  void
  309  st_add_direct(table, key, value)
  310      st_table *table;
@@ -400,7 +417,7 @@ in the function name.
  319  }
 
 (st.c)
-
+``` Just as before, the `do_hash()` macro that obtains a value is called here. After that, the next calculation is the same as at the start of @@ -411,7 +428,7 @@ Since the name is all uppercase, we can anticipate that is a macro. ▼ `ADD_DIRECT()` -
+```TODO-lang
  268  #define ADD_DIRECT(table, key, value, hash_val, bin_pos) \
  269  do {                                                     \
  270      st_table_entry *entry;                               \
@@ -434,7 +451,7 @@ Since the name is all uppercase, we can anticipate that is a macro.
  284  } while (0)
 
 (st.c)
-
+``` The first `if` is an exception case so I will explain it afterwards. @@ -443,10 +460,10 @@ The first `if` is an exception case so I will explain it afterwards. (B) Insert the `entry` into the start of the list. This is the idiom for handling the list. In other words, -
+```TODO-lang
 entry->next = list_beg;
 list_beg = entry;
-
+``` makes it possible to insert an entry to the front of the list. This is similar to "cons-ing" in the Lisp language. Check for yourself that even if `list_beg` @@ -456,7 +473,7 @@ Now, let me explain the code I left aside. ▼ `ADD_DIRECT()`-`rehash` -
+```TODO-lang
  271      if (table->num_entries / (table->num_bins)           \
                               > ST_DEFAULT_MAX_DENSITY) {      \
  272          rehash(table);                                   \
@@ -464,7 +481,7 @@ Now, let me explain the code I left aside.
  274      }                                                    \
 
 (st.c)
-
+``` `DENSITY` is "concentration". In other words, this conditional checks if the hash table is "crowded" or not. In the `st_table`, as the number of values that @@ -476,23 +493,23 @@ The current `ST_DEFAULT_MAX_DENSITY` is ▼ `ST_DEFAULT_MAX_DENSITY` -
+```TODO-lang
   23  #define ST_DEFAULT_MAX_DENSITY 5
 
 (st.c)
-
+``` Because of this setting, if in all `bin_pos` there are 5 `st_table_entries`, then the size will be increased. -h3. `st_insert()` +### `st_insert()` `st_insert()` is nothing more than a combination of `st_add_direct()` and `st_lookup()`, so if you understand those two, this will be easy. ▼ `st_insert()` -
+```TODO-lang
  286  int
  287  st_insert(table, key, value)
  288      register st_table *table;
@@ -516,26 +533,27 @@ h3. `st_insert()`
  306  }
 
 (st.c)
-
+``` It checks if the element is already registered in the table. Only when it is not registered will it be added. If there is a insertion, return 0. If there is no insertion, return a 1. -h2. `ID` and Symbols +`ID` and Symbols +================ I've already discussed what an `ID` is. It is a correspondence between an arbitrary string of characters and a value. It is used to declare various names. The actual data type is `unsigned int`. -h3. From `char*` to `ID` +### From `char*` to `ID` The conversion from string to `ID` is executed by `rb_intern()`. This function is rather long, so let's omit the middle. ▼ `rb_intern()` (simplified) -
+```TODO-lang
 5451  static st_table *sym_tbl;       /*  char* to ID   */
 5452  static st_table *sym_rev_tbl;   /*  ID to char*   */
 
@@ -563,7 +581,7 @@ is rather long, so let's omit the middle.
 5543  }
 
 (parse.y)
-
+``` The string and `ID` correspondence relationship can be accomplished by using the `st_table`. There probably isn't any especially difficult part here. @@ -574,7 +592,7 @@ parser, it is necessary to know the variable's classification from the `ID`. However, the fundamental part of `ID` is unrelated to this, so I won't explain it here. -h3. From `ID` to `char*` +### From `ID` to `char*` The reverse of `rb_intern()` is `rb_id2name()`, which takes an `ID` and generates a `char*`. You probably know this, but the 2 in `id2name` is "to". @@ -586,7 +604,7 @@ simplify it. ▼ `rb_id2name()` (simplified) -
+```TODO-lang
 char *
 rb_id2name(id)
     ID id;
@@ -597,7 +615,7 @@ rb_id2name(id)
         return name;
     return 0;
 }
-
+``` Maybe it seems that it is a little over-simplified, but in reality if we remove the details it really becomes this simple. @@ -613,7 +631,7 @@ not returned) on a value, then a Ruby object is used. I have not yet discussed it, but a Ruby object is automatically released when it is no longer needed, even if we are not taking care of the object. -h3. Converting `VALUE` and `ID` +### Converting `VALUE` and `ID` `ID` is shown as an instance of the `Symbol` class at the Ruby level. And it can be obtained like so: `"string".intern`. The implementation of @@ -621,7 +639,7 @@ And it can be obtained like so: `"string".intern`. The implementation of ▼ `rb_str_intern()` -
+```TODO-lang
 2996  static VALUE
 2997  rb_str_intern(str)
 2998      VALUE str;
@@ -638,7 +656,7 @@ And it can be obtained like so: `"string".intern`. The implementation of
 3009  }
 
 (string.c)
-
+``` This function is quite reasonable as a `ruby` class library code example. Please pay attention to the part where `RSTRING()` is used and casted, and @@ -653,7 +671,7 @@ The implementation is in `sym_to_s`. ▼ `sym_to_s()` -
+```TODO-lang
  522  static VALUE
  523  sym_to_s(sym)
  524      VALUE sym;
@@ -662,7 +680,7 @@ The implementation is in `sym_to_s`.
  527  }
 
 (object.c)
-
+``` `SYM2ID()` is the macro that converts `Symbol` (`VALUE`) to an `ID`. diff --git a/object.textile b/object.md similarity index 93% rename from object.textile rename to object.md index 5cf6bf3..4ec5a20 100644 --- a/object.textile +++ b/object.md @@ -4,11 +4,13 @@ title: Objects - Structure of Ruby objects --- Translated by Vincent ISAMBART -h1. Chapter 2: Objects +Chapter 2: Objects +------------------ -h2. Structure of Ruby objects +Structure of Ruby objects +========================= -h3. Guideline +### Guideline From this chapter, we will begin actually exploring the `ruby` source code. First, as declared at the beginning of this book, @@ -18,32 +20,35 @@ What are the necessary conditions for objects to be objects? There could be many ways to explain about object itself, but there are only three conditions that are truly indispensable. -# The ability to differentiate itself from other objects (an identity) -# The ability to respond to messages (methods) -# The ability to store internal state (instance variables) +* The ability to differentiate itself from other objects (an identity) +* The ability to respond to messages (methods) +* The ability to store internal state (instance variables) In this chapter, we are going to confirm these three features one by one. The target file is mainly `ruby.h`, but we will also briefly look at other files such as `object.c`, `class.c` or `variable.c`. -h3. `VALUE` and object struct +### `VALUE` and object struct In `ruby`, the body of an object is expressed by a struct and always handled via a pointer. A different struct type is used for each class, but the pointer type will always be `VALUE` (figure 1). -!images/ch_object_value.png(`VALUE` and struct)! +
+ figure 1: `VALUE` and struct +
figure 1: `VALUE` and struct
+
Here is the definition of `VALUE`: ▼ `VALUE` -
+```TODO-lang
   71  typedef unsigned long VALUE;
 
 (ruby.h)
-
+``` In practice, when using a `VALUE`, we cast it to the pointer to each object struct. Therefore if an `unsigned long` and a pointer have a different size, `ruby` @@ -72,13 +77,16 @@ a different struct is used based on the class of the object. For example, for an string object, `struct RString` is used, so we will have something like the following. -!images/ch_object_string.png(String object)! +
+ figure 2: String object +
figure 2: String object
+
Let's look at the definition of a few object structs. ▼ Examples of object struct -
+```TODO-lang
       /* struct for ordinary objects */
  295  struct RObject {
  296      struct RBasic basic;
@@ -108,7 +116,7 @@ Let's look at the definition of a few object structs.
  332  };
 
 (ruby.h)
-
+``` Before looking at every one of them in detail, let's begin with something more general. @@ -119,20 +127,23 @@ That's why `Rxxxx()` macros have been made for each object struct. For example, for `struct RString` there is `RSTRING()`, for `struct RArray` there is `RARRAY()`, etc... These macros are used like this: -
+```TODO-lang
 
 VALUE str = ....;
 VALUE arr = ....;
 RSTRING(str)->len;   /* ((struct RString*)str)->len */
 RARRAY(arr)->len;    /* ((struct RArray*)arr)->len */
-
+``` Another important point to mention is that all object structs start with a member `basic` of type `struct RBasic`. As a result, if you cast this `VALUE` to `struct RBasic*`, you will be able to access the content of `basic`, regardless of the type of struct pointed to by `VALUE`. -!images/ch_object_rbasic.png(`struct RBasic`)! +
+ figure 3: `struct RBasic` +
figure 3: `struct RBasic`
+
Because it is purposefully designed this way, `struct RBasic` must contain very important information for Ruby objects. @@ -141,24 +152,24 @@ for `struct RBasic`: ▼ `struct RBasic` -
+```TODO-lang
  290  struct RBasic {
  291      unsigned long flags;
  292      VALUE klass;
  293  };
 
 (ruby.h)
-
+``` `flags` are multipurpose flags, mostly used to register the struct type (for instance `struct RObject`). The type flags are named `T_xxxx`, and can be obtained from a `VALUE` using the macro `TYPE()`. Here is an example: -
+```TODO-lang
 VALUE str;
 str = rb_str_new();    /* creates a Ruby string (its struct is RString) */
 TYPE(str);             /* the return value is T_STRING */
-
+``` The all flags are named as `T_xxxx`, like `T_STRING` for `struct RString` and `T_ARRAY` for `struct RArray`. @@ -168,7 +179,10 @@ The other member of `struct RBasic`, `klass`, contains the class this object belongs to. As the `klass` member is of type `VALUE`, what is stored is (a pointer to) a Ruby object. In short, it is a class object. -!images/ch_object_class.png(object and class)! +
+ figure 4: object and class +
figure 4: object and class
+
The relation between an object and its class will be detailed in the "Methods" section of this chapter. @@ -176,7 +190,7 @@ section of this chapter. By the way, this member is named `klass` so as not to conflict with the reserved word `class` when the file is processed by a C++ compiler. -h4. About struct types +#### About struct types I said that the type of struct is stored in the `flags` member of `struct Basic`. But why do we have to store the type of struct? It's to be @@ -204,7 +218,7 @@ level use `struct RObject`, so finding a struct from a class would require to keep the correspondence between each class and struct. That's why it's easier and faster to put the information about the type in the struct. -h4. The use of `basic.flags` +#### The use of `basic.flags` Regarding the use of `basic.flags`, because I feel bad to say it is the struct type "and such", @@ -212,30 +226,33 @@ I'll illustrate it entirely here. (Figure 5) There is no need to understand everything right away, because this is prepared for the time when you will be wondering about it later. -!images/ch_object_flags.png(Use of `flags`)! +
+ figure 5: Use of `flags` +
figure 5: Use of `flags`
+
When looking at the diagram, it looks like that 21 bits are not used on 32 bit machines. On these additional bits, the flags `FL_USER0` to `FL_USER8` are defined, and are used for a different purpose for each struct. In the diagram I also put `FL_USER0` (`FL_SINGLETON`) as an example. -h3. Objects embedded in `VALUE` +### Objects embedded in `VALUE` As I said, `VALUE` is an `unsigned long`. As `VALUE` is a pointer, it may look like `void*` would also be all right, but there is a reason for not doing this. In fact, `VALUE` can also not be a pointer. The 6 cases for which `VALUE` is not a pointer are the following: -# small integers -# symbols -# `true` -# `false` -# `nil` -# `Qundef` +* small integers +* symbols +* `true` +* `false` +* `nil` +* `Qundef` I'll explain them one by one. -h4. Small integers +#### Small integers All data are objects in Ruby, thus integers are also objects. But since there are so many kind of integer objects, @@ -256,12 +273,12 @@ to a `Fixnum`, and confirm that `Fixnum` are directly embedded in `VALUE`. ▼ `INT2FIX` -
+```TODO-lang
  123  #define INT2FIX(i) ((VALUE)(((long)(i))<<1 | FIXNUM_FLAG))
  122  #define FIXNUM_FLAG 0x01
 
 (ruby.h)
-
+``` In brief, shift 1 bit to the left, and bitwise or it with 1. @@ -281,7 +298,7 @@ convert it to `Bignum`. `NUM2INT()` will convert both `Fixnum` and `Bignum` to `int`. If the number can't fit in an `int`, an exception will be raised, so there is no need to check the value range. -h4. Symbols +#### Symbols What are symbols? @@ -291,11 +308,11 @@ In the first place, there's a type named `ID` used inside `ruby`. Here it is. ▼ `ID` -
+```TODO-lang
   72  typedef unsigned long ID;
 
 (ruby.h)
-
+``` This `ID` is a number having a one-to-one association with a string. However, it's not possible to have an association between all strings in this world and @@ -325,12 +342,12 @@ why `Symbol`, like `Fixnum`, was made embedded in `VALUE`. Let's look at the ▼ `ID2SYM` -
+```TODO-lang
  158  #define SYMBOL_FLAG 0x0e
  160  #define ID2SYM(x) ((VALUE)(((long)(x))<<8|SYMBOL_FLAG))
 
 (ruby.h)
-
+``` When shifting 8 bits left, `x` becomes a multiple of 256, that means a multiple of 4. Then after with a bitwise or (in this case it's the same as @@ -342,16 +359,16 @@ Finally, let's see the reverse conversion of `ID2SYM()`, `SYM2ID()`. ▼ `SYM2ID()` -
+```TODO-lang
  161  #define SYM2ID(x) RSHIFT((long)x,8)
 
 (ruby.h)
-
+``` `RSHIFT` is a bit shift to the right. As right shift may keep or not the sign depending of the platform, it became a macro. -h4. `true false nil` +#### `true false nil` These three are Ruby special objects. `true` and `false` represent the boolean values. `nil` is an object used to denote that there is no object. Their @@ -359,13 +376,13 @@ values at the C level are defined like this: ▼ `true false nil` -
+```TODO-lang
  164  #define Qfalse 0        /* Ruby's false */
  165  #define Qtrue  2        /* Ruby's true */
  166  #define Qnil   4        /* Ruby's nil */
 
 (ruby.h)
-
+``` This time it's even numbers, but as 0 or 2 can't be used by pointers, they can't overlap with other `VALUE`. It's because usually the first block of virtual memory is not allocated, to make the programs dereferencing a `NULL` @@ -380,11 +397,11 @@ For `Qnil`, there is a macro dedicated to check if a `VALUE` is `Qnil` or not, ▼ `NIL_P()` -
+```TODO-lang
  170  #define NIL_P(v) ((VALUE)(v) == Qnil)
 
 (ruby.h)
-
+``` The name ending with `p` is a notation coming from Lisp denoting that it is a function returning a boolean value. In other words, `NIL_P` means "is the @@ -398,44 +415,45 @@ That's why there's the `RTEST()` macro to do Ruby-style test in C. ▼ `RTEST()` -
+```TODO-lang
  169  #define RTEST(v) (((VALUE)(v) & ~Qnil) != 0)
 
 (ruby.h)
-
+``` As in `Qnil` only the third lower bit is 1, in `~Qnil` only the third lower bit is 0. Then only `Qfalse` and `Qnil` become 0 with a bitwise and. `!=0` has been added to be certain to only have 0 or 1, to satisfy the requirements of the glib library that only wants 0 or 1 -("[ruby-dev:11049]":http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-dev/11049). +([[ruby-dev:11049]](http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-dev/11049).) By the way, what is the '`Q`' of `Qnil`? 'R' I would have understood but why '`Q`'? When I asked, the answer was "Because it's like that in Emacs." I did not have the fun answer I was expecting... -h4. `Qundef` +#### `Qundef` ▼ `Qundef` -
+```TODO-lang
  167  #define Qundef 6                /* undefined value for placeholder */
 
 (ruby.h)
-
+``` This value is used to express an undefined value in the interpreter. It can't (must not) be found at all at the Ruby level. -h2. Methods +Methods +======= I already brought up the three important points of a Ruby object: having an identity, being able to call a method, and keeping data for each instance. In this section, I'll explain in a simple way the structure linking objects and methods. -h3. `struct RClass` +### `struct RClass` In Ruby, classes exist as objects during the execution. Of course. So there must be a struct for class objects. That struct is `struct RClass`. Its @@ -447,7 +465,7 @@ differentiated by the `T_MODULE` struct flag. ▼ `struct RClass` -
+```TODO-lang
  300  struct RClass {
  301      struct RBasic basic;
  302      struct st_table *iv_tbl;
@@ -456,7 +474,7 @@ differentiated by the `T_MODULE` struct flag.
  305  };
 
 (ruby.h)
-
+``` First, let's focus on the `m_tbl` (Method TaBLe) member. `struct st_table` is an hashtable used everywhere in `ruby`. Its details will be explained in the @@ -483,9 +501,12 @@ which is the entity of `Kernel` object and the `super` of Kernel is `NULL`. So to put it conversely, if `super` is `NULL`, its `RClass` is the entity of `Kernel` (figure 6). -!images/ch_object_classtree.png(Class tree at the C level)! +
+ figure 6: Class tree at the C level +
figure 6: Class tree at the C level
+
-h3. Methods search +### Methods search With classes structured like this, you can easily imagine the method call process. The `m_tbl` of the object's class is searched, and if the method was @@ -497,7 +518,7 @@ The sequential search process in `m_tbl` is done by `search_method()`. ▼ `search_method()` -
+```TODO-lang
  256  static NODE*
  257  search_method(klass, id, origin)
  258      VALUE klass, *origin;
@@ -516,15 +537,15 @@ The sequential search process in `m_tbl` is done by `search_method()`.
  271  }
 
 (eval.c)
-
+``` This function searches the method named `id` in the class object `klass`. `RCLASS(value)` is the macro doing: -
+```TODO-lang
 ((struct RClass*)(value))
-
+``` `st_lookup()` is a function that searches in `st_table` the value corresponding to a key. If the value is found, the function returns true and @@ -535,12 +556,13 @@ too slow. That's why in reality, once called, a method is cached. So starting from the second time it will be found without following `super` one by one. This cache and its search will be seen in the 15th chapter "Methods." -h2. Instance variables +Instance variables +================== In this section, I will explain the implementation of the third essential condition, instance variables. -h3. `rb_ivar_set()` +### `rb_ivar_set()` Instance variable is the mechanism that allows each object to hold its specific data. Since it is specific to each object, @@ -550,7 +572,7 @@ but is it really so? Let's look at the function ▼ `rb_ivar_set()` -
+```TODO-lang
       /* assign val to the id instance variable of obj */
  984  VALUE
  985  rb_ivar_set(obj, id, val)
@@ -578,7 +600,7 @@ but is it really so? Let's look at the function
 1005  }
 
 (variable.c)
-
+``` `rb_raise()` and `rb_error_frozen()` are both error checks. This can always be said hereafter: @@ -588,13 +610,13 @@ Therefore, we should wholly ignore them at first read. After removing the error handling, only the `switch` remains, but -
+```TODO-lang
 switch (TYPE(obj)) {
   case T_aaaa:
   case T_bbbb:
      ...
 }
-
+``` this form is an idiom of `ruby`. `TYPE()` is the macro returning the type flag of the object struct (`T_OBJECT`, `T_STRING`, etc.). In other words as @@ -609,7 +631,7 @@ the basis that their second member is `iv_tbl`. Let's confirm it in practice. ▼ Structs whose second member is `iv_tbl` -
+```TODO-lang
       /* TYPE(val) == T_OBJECT */
  295  struct RObject {
  296      struct RBasic basic;
@@ -625,7 +647,7 @@ the basis that their second member is `iv_tbl`. Let's confirm it in practice.
  305  };
 
 (ruby.h)
-
+``` `iv_tbl` is the Instance Variable TaBLe. It records the correspondences between the instance variable names and their values. @@ -633,12 +655,12 @@ It records the correspondences between the instance variable names and their val In `rb_ivar_set()`, let's look again the code for the structs having `iv_tbl`. -
+```TODO-lang
 if (!ROBJECT(obj)->iv_tbl)
     ROBJECT(obj)->iv_tbl = st_init_numtable();
 st_insert(ROBJECT(obj)->iv_tbl, id, val);
 break;
-
+``` `ROBJECT()` is a macro that casts a `VALUE` into a `struct RObject*`. It's possible that what `obj` points to is actually a struct RClass, @@ -656,26 +678,26 @@ its instance variable table is for the class object itself. In Ruby programs, it corresponds to something like the following: -
+```TODO-lang
 class C
   @ivar = "content"
 end
-
+``` -h3. `generic_ivar_set()` +### `generic_ivar_set()` What happens when assigning to an instance variable of an object whose struct is not one of `T_OBJECT T_MODULE T_CLASS`? ▼ `rb_ivar_set()` in the case there is no `iv_tbl` -
+```TODO-lang
 1000  default:
 1001    generic_ivar_set(obj, id, val);
 1002    break;
 
 (variable.c)
-
+``` This is delegated to `generic_ivar_set()`. Before looking at this function, let's first explain its general idea. @@ -688,13 +710,16 @@ it would be able to have instance variables. In `ruby`, these associations are solved by using a global `st_table`, `generic_iv_table` (figure 7). -!images/ch_object_givtable.png(`generic_iv_table`)! +
+ figure 7: `generic_iv_table` +
figure 7: `generic_iv_table`
+
Let's see this in practice. ▼ `generic_ivar_set()` -
+```TODO-lang
  801  static st_table *generic_iv_tbl;
 
  830  static void
@@ -726,7 +751,7 @@ Let's see this in practice.
  853  }
 
 (variable.c)
-
+``` `rb_special_const_p()` is true when its parameter is not a pointer. However, as this `if` part requires knowledge of the garbage collector, we'll skip it @@ -764,7 +789,7 @@ variables. If `FL_EXIVAR` is not set, even without searching in `generic_iv_tbl`, we can see the object does not have any instance variables. And of course a bit check is way faster than searching a `struct st_table`. -h3. Gaps in structs +### Gaps in structs Now you understood the way to store the instance variables, but why are there structs without `iv_tbl`? Why is there no `iv_tbl` in @@ -808,14 +833,14 @@ such functionality looks stupid. If you take all this into consideration, you can conclude that increasing the size of object structs for `iv_tbl` does not do any good. -h3. `rb_ivar_get()` +### `rb_ivar_get()` We saw the `rb_ivar_set()` function that sets variables, so let's see quickly how to get them. ▼ `rb_ivar_get()` -
+```TODO-lang
  960  VALUE
  961  rb_ivar_get(obj, id)
  962      VALUE obj;
@@ -845,7 +870,7 @@ how to get them.
  982  }
 
 (variable.c)
-
+``` The structure is completely the same. @@ -873,19 +898,20 @@ parameter `obj` does not point to a struct. As no struct means no Thus `FL_xxxx()` is designed to always return false for such object. Hence, objects that are `rb_special_const_p()` should be treated specially here. -h2. Object Structs +Object Structs +============== In this section, about the important ones among object structs, we'll briefly see their concrete appearances and how to deal with them. -h3. `struct RString` +### `struct RString` `struct RString` is the struct for the instances of the `String` class and its subclasses. ▼ `struct RString` -
+```TODO-lang
  314  struct RString {
  315      struct RBasic basic;
  316      long len;
@@ -897,7 +923,7 @@ its subclasses.
  322  };
 
 (ruby.h)
-
+``` `ptr` is a pointer to the string, and `len` the length of that string. Very straightforward. @@ -912,10 +938,10 @@ you can access `ptr` and `len` by writing `RSTRING(str)->ptr` or `RSTRING(str)->len`, and it is allowed. But there are some points to pay attention to. -# you have to check if `str` really points to a `struct RString` +* you have to check if `str` really points to a `struct RString` by yourself beforehand -# you can read the members, but you must not modify them -# you can't store `RSTRING(str)->ptr` in something like a local variable and +* you can read the members, but you must not modify them +* you can't store `RSTRING(str)->ptr` in something like a local variable and use it later Why is that? First, there is an important software engineering principle: @@ -930,11 +956,11 @@ characteristics. Ruby's strings can be modified (are mutable). By mutable I mean after the following code: -
+```TODO-lang
 s = "str"        # create a string and assign it to s
 s.concat("ing")  # append "ing" to this string object
 p(s)             # show "string"
-
+``` the content of the object pointed by `s` will become "`string`". It's different from Java or Python string objects. Java's `StringBuffer` is closer. @@ -954,13 +980,13 @@ additional memory. So what is this other `aux.shared`? It's to speed up the creation of literal strings. Have a look at the following Ruby program. -
+```TODO-lang
 while true do  # repeat indefinitely
   a = "str"        # create a string with "str" as content and assign it to a
   a.concat("ing")  # append "ing" to the object pointed by a
   p(a)             # show "string"
 end
-
+``` Whatever the number of times you repeat the loop, the fourth line's `p` has to show `"string"`. @@ -988,7 +1014,7 @@ modifying strings created as litterals, `aux.shared` has to be separated. Before ending this section, I'll write some examples of dealing with `RString`. I'd like you to regard `str` as a `VALUE` that points to `RString` when reading this. -
+```TODO-lang
 RSTRING(str)->len;               /* length */
 RSTRING(str)->ptr[0];            /* first character */
 str = rb_str_new("content", 7);  /* create a string with "content" as its content
@@ -996,16 +1022,16 @@ str = rb_str_new("content", 7);  /* create a string with "content" as its conten
 str = rb_str_new2("content");    /* create a string with "content" as its content
                                     its length is calculated with strlen() */
 rb_str_cat2(str, "end");         /* Concatenate a C string to a Ruby string */
-
+``` -h3. `struct RArray` +### `struct RArray` `struct RArray` is the struct for the instances of Ruby's array class `Array`. ▼ `struct RArray` -
+```TODO-lang
  324  struct RArray {
  325      struct RBasic basic;
  326      long len;
@@ -1017,7 +1043,7 @@ h3. `struct RArray`
  332  };
 
 (ruby.h)
-
+``` Except for the type of `ptr`, this structure is almost the same as `struct RString`. `ptr` points to the content of the array, and `len` is its @@ -1037,7 +1063,7 @@ With `RARRAY(arr)->ptr` and `RARRAY(arr)->len`, you can refer to the members, and it is allowed, but you must not assign to them, etc. We'll only look at simple examples: -
+```TODO-lang
 /* manage an array from C */
 VALUE ary;
 ary = rb_ary_new();             /* create an empty array */
@@ -1050,15 +1076,15 @@ ary = []      # create an empty array
 ary.push(9)   # push 9
 ary[0]        # look what's at index 0
 p(ary[0])     # do p on ary[0] (the result is 9)
-
+``` -h3. `struct RRegexp` +### `struct RRegexp` It's the struct for the instances of the regular expression class `Regexp`. ▼ `struct RRegexp` -
+```TODO-lang
  334  struct RRegexp {
  335      struct RBasic basic;
  336      struct re_pattern_buffer *ptr;
@@ -1067,7 +1093,7 @@ It's the struct for the instances of the regular expression class `Regexp`.
  339  };
 
 (ruby.h)
-
+``` `ptr` is the compiled regular expression. `str` is the string before compilation (the source code of the regular expression), and `len` is this @@ -1077,14 +1103,14 @@ As any code to handle `Regexp` objects doesn't appear in this book, we won't see how to use it. Even if you use it in extension libraries, as long as you do not want to use it a very particular way, the interface functions are enough. -h3. `struct RHash` +### `struct RHash` `struct RHash` is the struct for `Hash` object, which is Ruby's hash table. ▼ `struct RHash` -
+```TODO-lang
  341  struct RHash {
  342      struct RBasic basic;
  343      struct st_table *tbl;
@@ -1093,7 +1119,7 @@ which is Ruby's hash table.
  346  };
 
 (ruby.h)
-
+``` It's a wrapper for `struct st_table`. `st_table` will be detailed in the next chapter "Names and name tables." @@ -1101,25 +1127,25 @@ chapter "Names and name tables." `ifnone` is the value when a key does not have an associated value, its default is `nil`. `iter_lev` is to make the hashtable reentrant (multithread safe). -h3. `struct RFile` +### `struct RFile` `struct RFile` is a struct for instances of the built-in IO class and its subclasses. ▼ `struct RFile` -
+```TODO-lang
  348  struct RFile {
  349      struct RBasic basic;
  350      struct OpenFile *fptr;
  351  };
 
 (ruby.h)
-
+``` ▼ `OpenFile` -
+```TODO-lang
   19  typedef struct OpenFile {
   20      FILE *f;                    /* stdio ptr for read/write */
   21      FILE *f2;                   /* additional ptr for rw pipes */
@@ -1131,13 +1157,13 @@ its subclasses.
   27  } OpenFile;
 
 (rubyio.h)
-
+``` All members have been transferred in `struct OpenFile`. As there aren't many instances of `IO` objects, it's OK to do it like this. The purpose of each member is written in the comments. Basically, it's a wrapper around C's `stdio`. -h3. `struct RData` +### `struct RData` `struct RData` has a different tenor from what we saw before. It is the struct for implementation of extension libraries. @@ -1150,7 +1176,7 @@ for managing a pointer to a user defined struct" has been created on ▼ `struct RData` -
+```TODO-lang
  353  struct RData {
  354      struct RBasic basic;
  355      void (*dmark) _((void*));
@@ -1159,7 +1185,7 @@ for managing a pointer to a user defined struct" has been created on
  358  };
 
 (ruby.h)
-
+``` `data` is a pointer to the user defined struct, `dfree` is the function used to free that user defined struct, and @@ -1170,4 +1196,7 @@ the time being let's just look at its representation (figure 8). The detailed explanation of its members will be introduced after we'll finish chapter 5 "Garbage collection." -!images/ch_object_rdata.png(Representation of `struct RData`)! +
+ figure 8: Representation of `struct RData` +
figure 8: Representation of `struct RData`
+
diff --git a/parser.textile b/parser.md similarity index 94% rename from parser.textile rename to parser.md index d84eb7c..cb1bc50 100644 --- a/parser.textile +++ b/parser.md @@ -4,11 +4,13 @@ title: Parser --- Translated by Robert GRAVINA & ocha- -h1. Chapter 10: Parser +Chapter 10: Parser +------------------ -h2. Outline of this chapter +Outline of this chapter +======================= -h3. Parser construction +### Parser construction The main source of the parser is `parser.y`. Because it is `*.y`, it is the input for `yacc` @@ -25,15 +27,18 @@ using Windows who may not be aware, the `mv` (move) command creates a new copy of a file and removes the original. `cc` is, of course, the C compiler and `cpp` the C pre-processor. -!images/ch_parser_build.jpg(Parser construction process)! +
+ figure 1: Parser construction process +
figure 1: Parser construction process
+
-h3. Dissecting `parse.y` +### Dissecting `parse.y` Let's now look at `parse.y` in a bit more detail. The following figure presents a rough outline of the contents of `parse.y`. ▼ parse.y -
+```TODO-lang
 %{
 header
 %}
@@ -53,7 +58,7 @@ user code section
     semantic analysis
     local variable management
     ID implementation
-
+``` As for the rules and definitions part, it is as previously described. Since this part is indeed the heart of the parser, @@ -72,9 +77,10 @@ explained in this book. |Local variable management|Chapter 12 "Syntax tree construction"|Section 4 "Local variables"| |`ID` implementation|Chapter 3 "Names and name tables"|Section 2 "`ID` and symbols"| -h2. General remarks about grammar rules +General remarks about grammar rules +=================================== -h3. Coding rules +### Coding rules The grammar of `ruby` conforms to a coding standard and is thus easy to read once you are familiar with it. @@ -101,7 +107,7 @@ here stands for `large`. Since the reserved words `begin` and `end` already exist (naturally, with symbol names `kBEGIN` and `kEND`), these non-standard symbol names were required. -h3. Important symbols +### Important symbols `parse.y` contains both grammar rules and actions, however, for now I would like to concentrate on the grammar rules alone. The script sample/exyacc.rb can be @@ -113,7 +119,7 @@ modified version of `exyacc.rb`\footnote{modified `exyacc.rb`:`tools/exyacc2.rb` located on the attached CD-ROM} to extract the grammar rules. ▼ `parse.y`(rules) -
+```TODO-lang
 program         : compstmt
 
 bodystmt        : compstmt
@@ -124,7 +130,7 @@ bodystmt        : compstmt
 compstmt        : stmts opt_terms
                        :
                        :
-
+``` The output is quite long - over 450 lines of grammar rules - and as such I have only included the most important parts in this chapter. @@ -182,7 +188,7 @@ such as Lisp and Scheme, since everything is an expression, they do not have statements in the first place. Ruby is close to Lisp's design in this regard. -h3. Program structure +### Program structure Now let's turn our attention to the grammar rules of `ruby`. Firstly, in `yacc`, the left hand side of the first rule represents the entire grammar. @@ -194,7 +200,7 @@ With adding `arg` to them, let's look at their rules. ▼ `ruby` grammar (outline) -
+```TODO-lang
 program         : compstmt
 
 compstmt        : stmts opt_terms
@@ -234,7 +240,7 @@ primary         : literal
                     :
                 | kREDO
                 | kRETRY
-
+``` If we focus on the last rule of each element, we can clearly make out a hierarchy of `program`→`stmt`→`expr`→`arg`→ @@ -242,12 +248,12 @@ we can clearly make out a hierarchy of `program`→`stmt`→`expr`→`arg`→ Also, we'd like to focus on this rule of `primary`. -
+```TODO-lang
 primary         : literal
                     :
                     :
                 | tLPAREN_ARG expr  ')'      /* here */
-
+``` The name `tLPAREN_ARG` comes from `t` for terminal symbol, `L` for left and `PAREN` for parentheses - it is the open parenthesis. Why this isn't `'('` @@ -256,21 +262,27 @@ of this rule is demote an `expr` to a `primary`. This creates a cycle which can be seen in Figure 2, and the arrow shows how this rule is reduced during parsing. -!images/ch_parser_exprloop.jpg(`expr` demotion)! +
+ figure 2: `expr` demotion +
figure 2: `expr` demotion
+
The next rule is also particularly interesting. -
+```TODO-lang
 primary         : literal
                     :
                     :
                 | tLPAREN compstmt ')'   /* here */
-
+``` A `compstmt`, which equals to the entire program (`program`), can be demoted to a `primary` with this rule. The next figure illustrates this rule in action. -!images/ch_parser_progloop.jpg(`program` demotion)! +
+ figure 3: `program` demotion +
figure 3: `program` demotion
+
This means that for any syntax element in Ruby, if we surround it with parenthesis it will become a `primary` and can be passed as an argument to a @@ -278,21 +290,21 @@ function, be used as the right hand side of an expression etc. This is an incredible fact. Let's actually confirm it. -
+```TODO-lang
 p((class C; end))
 p((def a() end))
 p((alias ali gets))
 p((if true then nil else nil end))
 p((1 + 1 * 1 ** 1 - 1 / 1 ^ 1))
-
+``` If we invoke `ruby` with the `-c` option (syntax check), we get the following output. -
+```TODO-lang
 % ruby -c primprog.rb
 Syntax OK
-
+``` Indeed, it's hard to believe but, it could actually pass. @@ -310,10 +322,10 @@ rule does hold. In the next section I will cover the contents of the important elements one by one. -h3. `program` +### `program` ▼ `program` -
+```TODO-lang
 program         : compstmt
 
 compstmt        : stmts opt_terms
@@ -321,7 +333,7 @@ compstmt        : stmts opt_terms
 stmts           : none
                 | stmt
                 | stmts terms stmt
-
+``` As mentioned earlier, `program` represents the entire grammar that means the entire program. @@ -335,7 +347,7 @@ terminate the sentences, such as semicolons or newlines. `opt_terms` means "OPTional terms". The definitions are as follows: ▼ `opt_terms` -
+```TODO-lang
 opt_terms       :
                 | terms
 
@@ -344,23 +356,23 @@ terms           : term
 
 term            : ';'
                 | '\n'
-
+``` The initial `;` or `\n` of a `terms` can be followed by any number of `;` only; based on that, you might start thinking that if there are 2 or more consecutive newlines, it could cause a problem. Let's try and see what actually happens. -
+```TODO-lang
 1 + 1   # first newline
         # second newline
         # third newline
 1 + 1
-
+``` Run that with `ruby -c`. -
+```TODO-lang
 % ruby -c optterms.rb
 Syntax OK
-
+``` Strange, it worked! What actually happens is this: consecutive newlines are simply discarded by the scanner, which returns only the first newline in a series. @@ -368,12 +380,12 @@ By the way, although we said that `program` is the same as `compstmt`, if that w To generalize this point, the grammar rules can be divided into 2 groups: those which are needed for parsing the program structure, and those which are needed for execution of semantic actions. The `none` rule which was mentioned earlier when talking about `stmts` is another one which exists for executing actions -- it's used to return a `NULL` pointer for an empty list of type `NODE*`. -h3. `stmt` +### `stmt` Next is `stmt`. This one is rather involved, so we'll look into it a bit at a time. ▼ `stmt`(1) -
+```TODO-lang
 stmt            : kALIAS fitem  fitem
                 | kALIAS tGVAR tGVAR
                 | kALIAS tGVAR tBACK_REF
@@ -386,7 +398,7 @@ stmt            : kALIAS fitem  fitem
                 | stmt kRESCUE_MOD stmt
                 | klBEGIN '{' compstmt '}'
                 | klEND '{' compstmt '}'
-
+``` Looking at that, somehow things start to make sense. The first few have `alias`, then `undef`, then the next few are all something followed by `_MOD` -- those should be statements with postposition modifiers, as you can imagine. @@ -395,7 +407,7 @@ Looking at that, somehow things start to make sense. The first few have `alias`, As explained earlier, `klBEGIN` and `klEND` represent `BEGIN` and `END`. ▼ `stmt`(2) -
+```TODO-lang
                 | lhs '=' command_call
                 | mlhs '=' command_call
                 | var_lhs tOP_ASGN command_call
@@ -404,7 +416,7 @@ As explained earlier, `klBEGIN` and `klEND` represent `BEGIN` and `END`.
                 | primary_value '.' tCONSTANT tOP_ASGN command_call
                 | primary_value tCOLON2 tIDENTIFIER tOP_ASGN command_call
                 | backref tOP_ASGN command_call
-
+``` Looking at these rules all at once is the right approach. The common point is that they all have `command_call` on the right-hand side. `command_call` represents a method call with the parentheses omitted. The new symbols which are introduced here are explained in the following table. I hope you'll refer to the table as you check over each grammar rule. @@ -427,10 +439,10 @@ This abbreviation is used at a lot of places in the source code of `ruby`.

▼ `stmt` (3)

-
+```TODO-lang
                 | lhs '=' mrhs_basic
                 | mlhs '=' mrhs
-
+``` These two are multiple assignments. @@ -439,9 +451,9 @@ We've come to recognize that knowing the meanings of names makes the comprehensi

▼ `stmt` (4)

-
+```TODO-lang
                 | expr
-
+``` Lastly, it joins to `expr`. @@ -449,11 +461,11 @@ Lastly, it joins to `expr`. -h3. `expr` +### `expr`

▼ `expr`

-
+```TODO-lang
 expr            : kRETURN call_args
                 | kBREAK call_args
                 | kNEXT call_args
@@ -463,7 +475,7 @@ expr            : kRETURN call_args
                 | kNOT expr
                 | '!' command_call
                 | arg
-
+``` Expression. The expression of `ruby` is very small in grammar. @@ -479,10 +491,10 @@ it would cause conflicts tremendously. However, these two below are of different kind. -
+```TODO-lang
 expr kAND expr
 expr kOR expr
-
+``` `kAND` is "`and`", and `kOR` is "`or`". @@ -493,19 +505,19 @@ at least they need to be `expr` to go well. For example, the following usage is possible ... -
+```TODO-lang
   valid_items.include? arg  or raise ArgumentError, 'invalid arg'
 # valid_items.include?(arg) or raise(ArgumentError, 'invalid arg')
-
+``` However, if the rule of `kOR` existed in `arg` instead of `expr`, it would be joined as follows. -
+```TODO-lang
 valid_items.include?((arg or raise)) ArgumentError, 'invalid arg'
-
+``` Obviously, this would end up a parse error. @@ -513,11 +525,11 @@ Obviously, this would end up a parse error. -h3. `arg` +### `arg`

▼ `arg`

-
+```TODO-lang
 arg             : lhs '=' arg
                 | var_lhs tOP_ASGN arg
                 | primary_value '[' aref_args ']' tOP_ASGN arg
@@ -557,7 +569,7 @@ arg             : lhs '=' arg
                 | kDEFINED opt_nl  arg
                 | arg '?' arg ':' arg
                 | primary
-
+``` Although there are many rules here, the complexity of the grammar is not @@ -572,14 +584,14 @@ this is virtually only a mere enumeration. Let's cut the "mere enumeration" out from the `arg` rule by merging. -
+```TODO-lang
 arg: lhs '=' arg              /* 1 */
    | primary T_opeq arg       /* 2 */
    | arg T_infix arg          /* 3 */
    | T_pre arg                /* 4 */
    | arg '?' arg ':' arg      /* 5 */
    | primary                  /* 6 */
-
+``` There's no meaning to distinguish terminal symbols from lists of terminal symbols, @@ -620,19 +632,19 @@ The conclusion is all requirements are met and this grammar does not conflict. We could say it's a matter of course. -h3. `primary` +### `primary` Because `primary` has a lot of grammar rules, we'll split them up and show them in parts.

▼ `primary` (1)

-
+```TODO-lang
 primary         : literal
                 | strings
                 | xstring
                 | regexp
                 | words
                 | qwords
-
+``` Literals. @@ -640,11 +652,11 @@ Literals.

▼ `primary` (2)

-
+```TODO-lang
                 | var_ref
                 | backref
                 | tFID
-
+``` Variables. @@ -656,30 +668,30 @@ even if it appears solely, it becomes a method call at the parser level.

▼ `primary` (3)

-
+```TODO-lang
                 | kBEGIN
                   bodystmt
                   kEND
-
+``` `bodystmt` contains `rescue` and `ensure`. It means this is the `begin` of the exception control.

▼ `primary` (4)

-
+```TODO-lang
                 | tLPAREN_ARG expr  ')'
                 | tLPAREN compstmt ')'
-
+``` This has already described. Syntax demoting.

▼ `primary` (5)

-
+```TODO-lang
                 | primary_value tCOLON2 tCONSTANT
                 | tCOLON3 cname
-
+``` Constant references. `tCONSTANT` is for constant names (capitalized identifiers). @@ -695,28 +707,28 @@ is to deal with the methods without parentheses. For example, it is to distinguish the next two from each other: -
+```TODO-lang
 p Net::HTTP    # p(Net::HTTP)
 p Net  ::HTTP  # p(Net(::HTTP))
-
+``` If there's a space or a delimiter character such as an open parenthesis just before it, it becomes `tCOLON3`. In the other cases, it becomes `tCOLON2`.

▼ `primary` (6)

-
+```TODO-lang
                 | primary_value '[' aref_args ']'
-
+``` Index-form calls, for instance, `arr[i]`.

▼ `primary` (7)

-
+```TODO-lang
                 | tLBRACK aref_args ']'
                 | tLBRACE assoc_list '}'
-
+``` Array literals and Hash literals. This `tLBRACK` represents also `'['`, @@ -738,13 +750,13 @@ so I'd like you to make use of it when reading.

▼ `primary` (8)

-
+```TODO-lang
                 | kRETURN
                 | kYIELD '(' call_args ')'
                 | kYIELD '(' ')'
                 | kYIELD
                 | kDEFINED opt_nl '('  expr ')'
-
+``` Syntaxes whose forms are similar to method calls. @@ -758,9 +770,9 @@ it does not mean you cannot pass values, of course. There was the following rule in `expr`. -
+```TODO-lang
 kRETURN call_args
-
+``` `call_args` is a bare argument list, @@ -771,9 +783,9 @@ surrounding the multiple arguments of a `return` with parentheses as in the following code should be impossible. -
+```TODO-lang
 return(1, 2, 3)   # interpreted as return  (1,2,3) and results in parse error
-
+``` You could understand more about around here @@ -782,11 +794,11 @@ the next chapter "Finite-State Scanner".

▼ `primary` (9)

-
+```TODO-lang
                 | operation brace_block
                 | method_call
                 | method_call brace_block
-
+``` Method calls. `method_call` is with arguments (also with parentheses), @@ -800,7 +812,7 @@ the next chapter "Finite-State Scanner".

▼ `primary` (10)

-
+```TODO-lang
   | kIF expr_value then compstmt if_tail kEND         # if
   | kUNLESS expr_value then compstmt opt_else kEND    # unless
   | kWHILE expr_value do compstmt kEND                # while
@@ -808,7 +820,7 @@ the next chapter "Finite-State Scanner".
   | kCASE expr_value opt_terms case_body kEND         # case
   | kCASE opt_terms case_body kEND                    # case(Form2)
   | kFOR block_var kIN expr_value do compstmt kEND    # for
-
+``` The basic control structures. @@ -818,9 +830,9 @@ Because `primary` is also `arg`, we can also do something like this. -
+```TODO-lang
 p(if true then 'ok' end)   # shows "ok"
-
+``` @@ -835,14 +847,14 @@ In the next section, we'll think about this point again.

▼ `primary` (11)

-
+```TODO-lang
   | kCLASS cname superclass bodystmt kEND        # class definition
   | kCLASS tLSHFT expr term bodystmt kEND        # singleton class definition
   | kMODULE cname bodystmt kEND                  # module definition
   | kDEF fname f_arglist bodystmt kEND           # method definition
   | kDEF singleton dot_or_colon fname f_arglist bodystmt kEND
                                                  # singleton method definition
-
+``` Definition statements. @@ -854,12 +866,12 @@ it would never be a problem.

▼ `primary` (12)

-
+```TODO-lang
                 | kBREAK
                 | kNEXT
                 | kREDO
                 | kRETRY
-
+``` Various jumps. @@ -869,7 +881,7 @@ These are, well, not important from the viewpoint of grammar. -h3. Conflicting Lists +### Conflicting Lists In the previous section, the question "is it all right that `if` is in such @@ -879,7 +891,7 @@ but explaining instinctively is relatively easy. Here, let's simulate with a small rule defined as follows: -
+```TODO-lang
 %token A B o
 %%
 element   : A item_list B
@@ -889,7 +901,7 @@ item_list :
 
 item      : element
           | o
-
+``` `element` is the element that we are going to examine. @@ -903,9 +915,9 @@ For an element of the list, the `o` or `element` is nesting. With the parser based on this grammar, let's try to parse the following input. -
+```TODO-lang
 A  A  o  o  o  B  o  A  o  A  o  o  o  B  o  B  B
-
+``` They are nesting too many times for humans to comprehend without some helps such as indents. @@ -915,13 +927,16 @@ them are going to appear, replace them to a single `o` when they appear. All we have to do is repeating this procedure. Figure 4 shows the consequence. -!images/ch_parser_ablist.jpg(parse a list which starts with A and ends with B)! +
+ figure 4: parse a list which starts with A and ends with B +
figure 4: parse a list which starts with A and ends with B
+
However, if the ending `B` is missing, ... -
+```TODO-lang
 %token A o
 %%
 element   : A item_list    /* B is deleted for an experiment */
@@ -931,7 +946,7 @@ item_list :
 
 item      : element
           | o
-
+``` I processed this with `yacc` and got 2 shift/reduce conflicts. @@ -940,9 +955,9 @@ If we simply take `B` out from the previous one, The input would be as follows. -
+```TODO-lang
 A  A  o  o  o  o  A  o  A  o  o  o  o
-
+``` This is hard to interpret in any way. However, there was a rule that "choose @@ -951,7 +966,10 @@ let's follow it as an experiment and parse the input with shift (meaning interior) which takes precedence. (Figure 5) -!images/ch_parser_alist.jpg(parse a list of lists which start with A)! +
+ figure 5: parse a list of lists which start with A +
figure 5: parse a list of lists which start with A
+
It could be parsed. However, this is completely different from the intention of @@ -972,17 +990,21 @@ That's why `yacc` could not be used for ordinary HTML at all. -h2. Scanner +Scanner +======= -h3. Parser Outline +### Parser Outline I'll explain about the outline of the parser before moving on to the scanner. Take a look at Figure 6. -!images/ch_parser_interf.jpg(Parser Interface (Call Graph))! +
+ figure 6: Parser Interface (Call Graph +
figure 6: Parser Interface (Call Graph
+
There are three official interfaces of the parser: `rb_compile_cstr()`, @@ -1019,13 +1041,16 @@ and keeps them until it will form a token. Therefore, the whole structure of `yylex` can be depicted as Figure 7. -!images/ch_parser_scanner.jpg(The whole picture of the scanner)! +
+ figure 7: The whole picture of the scanner +
figure 7: The whole picture of the scanner
+
-h3. The input buffer +### The input buffer Let's start with the input buffer. Its interfaces are only the three: `nextc()`, `pushback()`, `peek()`. @@ -1037,25 +1062,28 @@ The variables used by the input buffer are the followings:

▼ the input buffer

-
+```TODO-lang
 2279  static char *lex_pbeg;
 2280  static char *lex_p;
 2281  static char *lex_pend;
 
 (parse.y)
-
+``` The beginning, the current position and the end of the buffer. Apparently, this buffer seems a simple single-line string buffer (Figure 8). -!images/ch_parser_ibuffer.jpg(The input buffer)! +
+ figure 8: The input buffer +
figure 8: The input buffer
+
-h4. `nextc()` +#### `nextc()` Then, let's look at the places using them. @@ -1063,7 +1091,7 @@ First, I'll start with `nextc()` that seems the most orthodox.

▼ `nextc()`

-
+```TODO-lang
 2468  static inline int
 2469  nextc()
 2470  {
@@ -1098,7 +1126,7 @@ First, I'll start with `nextc()` that seems the most orthodox.
 2499  }
 
 (parse.y)
-
+``` It seems that the first `if` is to test if it reaches the end of the input buffer. @@ -1116,7 +1144,7 @@ it's definite that each line comes in at a time. Here is the summary: -
+```TODO-lang
 if ( reached the end of the buffer )
     if (still there's more input)
         read the next line
@@ -1125,7 +1153,7 @@ if ( reached the end of the buffer )
 move the pointer forward
 skip reading CR of CRLF
 return c
-
+``` Let's also look at the function `lex_getline()`, which provides lines. @@ -1133,7 +1161,7 @@ The variables used by this function are shown together in the following.

▼ `lex_getline()`

-
+```TODO-lang
 2276  static VALUE (*lex_gets)();     /* gets function */
 2277  static VALUE lex_input;         /* non-nil if File */
 
@@ -1148,7 +1176,7 @@ The variables used by this function are shown together in the following.
 2428  }
 
 (parse.y)
-
+``` Except for the first line, this is not important. @@ -1158,7 +1186,7 @@ I searched the place where setting `lex_gets` and this is what I found:

▼ set `lex_gets`

-
+```TODO-lang
 2430  NODE*
 2431  rb_compile_string(f, s, line)
 2432      const char *f;
@@ -1179,7 +1207,7 @@ I searched the place where setting `lex_gets` and this is what I found:
 2461      lex_input = file;
 
 (parse.y)
-
+``` @@ -1192,7 +1220,7 @@ On the other hand, `lex_get_str()` is defined as follows:

▼ `lex_get_str()`

-
+```TODO-lang
 2398  static int lex_gets_ptr;
 
 2400  static VALUE
@@ -1216,7 +1244,7 @@ On the other hand, `lex_get_str()` is defined as follows:
 2418  }
 
 (parse.y)
-
+``` `lex_gets_ptr` remembers the place it have already read. @@ -1234,7 +1262,7 @@ and absorbed. There was also a similar method of `st_table`. -h4. `pushback()` +#### `pushback()` With the knowledge of the physical structure of the buffer and `nextc`, @@ -1242,7 +1270,7 @@ we can understand the rest easily. `pushback()` writes back a character. If put it in C, it is `ungetc()`.

▼ `pushback()`

-
+```TODO-lang
 2501  static void
 2502  pushback(c)
 2503      int c;
@@ -1252,27 +1280,27 @@ we can understand the rest easily.
 2507  }
 
 (parse.y)
-
+``` -h4. `peek()` +#### `peek()` `peek()` checks the next character without moving the pointer forward.

▼ `peek()`

-
+```TODO-lang
 2509  #define peek(c) (lex_p != lex_pend && (c) == *lex_p)
 
 (parse.y)
-
+``` -h3. The Token Buffer +### The Token Buffer The token buffer is the buffer of the next level. @@ -1290,12 +1318,12 @@ There are the five interfaces as follows: Now, we'll start with the data structures.

▼ The Token Buffer

-
+```TODO-lang
 2271  static char *tokenbuf = NULL;
 2272  static int   tokidx, toksiz = 0;
 
 (parse.y)
-
+``` `tokenbuf` is the buffer, `tokidx` is the end of the token @@ -1305,7 +1333,10 @@ This is also simply structured. If depicting it, it would look like Figure 9. -!images/ch_parser_tbuffer.jpg(The token buffer)! +
+ figure 9: The token buffer +
figure 9: The token buffer
+
Let's continuously go to the interface and @@ -1313,7 +1344,7 @@ read `newtok()`, which starts a new token.

▼ `newtok()`

-
+```TODO-lang
 2516  static char*
 2517  newtok()
 2518  {
@@ -1330,7 +1361,7 @@ read `newtok()`, which starts a new token.
 2529  }
 
 (parse.y)
-
+``` The initializing interface of the whole buffer does not exist, @@ -1350,7 +1381,7 @@ Next, let's look at the `tokadd()` to add a character to token buffer.

▼ `tokadd()`

-
+```TODO-lang
 2531  static void
 2532  tokadd(c)
 2533      char c;
@@ -1363,7 +1394,7 @@ Next, let's look at the `tokadd()` to add a character to token buffer.
 2540  }
 
 (parse.y)
-
+``` At the first line, a character is added. @@ -1376,21 +1407,21 @@ as `calloc()`. The rest interfaces are summarized below.

▼ `tokfix() tok() toklen() toklast()`

-
+```TODO-lang
 2511  #define tokfix() (tokenbuf[tokidx]='\0')
 2512  #define tok() tokenbuf
 2513  #define toklen() tokidx
 2514  #define toklast() (tokidx>0?tokenbuf[tokidx-1]:0)
 
 (parse.y)
-
+``` There's probably no question. -h3. `yylex()` +### `yylex()` `yylex()` is very long. Currently, there are more than 1000 lines. @@ -1400,7 +1431,7 @@ First, I'll show the whole structure that some parts of it are left out.

▼ `yylex` outline

-
+```TODO-lang
 3106  static int
 3107  yylex()
 3108  {
@@ -1459,7 +1490,7 @@ First, I'll show the whole structure that some parts of it are left out.
       }
 
 (parse.y)
-
+``` As for the return value of `yylex()`, @@ -1480,14 +1511,14 @@ but it is easy if you will amplify the same pattern. -h4. `'!'` +#### `'!'` Let's start with what is simple first.

▼ `yylex` - `'!'`

-
+```TODO-lang
 3205        case '!':
 3206          lex_state = EXPR_BEG;
 3207          if ((c = nextc()) == '=') {
@@ -1500,14 +1531,14 @@ Let's start with what is simple first.
 3214          return '!';
 
 (parse.y)
-
+``` I wroute out the meaning of the code, so I'd like you to read them by comparing each other. -
+```TODO-lang
 case '!':
   move to EXPR_BEG
   if (the next character is '=' then) {
@@ -1518,7 +1549,7 @@ case '!':
   }
   if it is neither, push the read character back
   token is '!'
-
+``` This `case` clause is short, but describes the important rule of the scanner. @@ -1540,14 +1571,14 @@ its next symbol is the beginning of an expression. -h4. `'<'` +#### `'<'` Next, we'll try to look at `'<'` as an example of using `yylval` (the value of a symbol).

▼ `yylex`−`'>'`

-
+```TODO-lang
 3296        case '>':
 3297          switch (lex_state) {
 3298            case EXPR_FNAME: case EXPR_DOT:
@@ -1571,7 +1602,7 @@ Next, we'll try to look at `'<'` as an example of using `yylval` (the value of a
 3316          return '>';
 
 (parse.y)
-
+``` The places except for `yylval` can be ignored. @@ -1596,7 +1627,7 @@ It is because they differs in their precedences. -h4. `':'` +#### `':'` If scanning is completely independent from parsing, this talk would be simple. @@ -1608,7 +1639,7 @@ The code of `':'` shown below is an example that a space changes the behavior.

▼ `yylex`−`':'`

-
+```TODO-lang
 3761        case ':':
 3762          c = nextc();
 3763          if (c == ':') {
@@ -1631,7 +1662,7 @@ The code of `':'` shown below is an example that a space changes the behavior.
 3778          return tSYMBEG;
 
 (parse.y)
-
+``` Again, ignoring things relating to `lex_state`, @@ -1645,7 +1676,7 @@ This is as I explained at `primary` in the previous section. -h4. Identifier +#### Identifier Until now, since there were only symbols, @@ -1656,7 +1687,7 @@ It is the scanning pattern of identifiers. First, the outline of `yylex` was as follows: -
+```TODO-lang
 yylex(...)
 {
     switch (c = nextc()) {
@@ -1669,7 +1700,7 @@ yylex(...)
 
    the scanning code of identifiers
 }
-
+``` The next code is an extract from the end of the huge `switch`. @@ -1677,7 +1708,7 @@ This is relatively long, so I'll show it with comments.

▼ `yylex` -- identifiers

-
+```TODO-lang
 4081        case '@':                 /* an instance variable or a class variable */
 4082          c = nextc();
 4083          newtok();
@@ -1733,7 +1764,7 @@ This is relatively long, so I'll show it with comments.
 4131      tokfix();
 
 (parse.y)
-
+``` Finally, I'd like you focus on the condition @@ -1741,10 +1772,10 @@ at the place where adding `!` or `?`. This part is to interpret in the next way. -
+```TODO-lang
 obj.m=1       # obj.m  =   1       (not obj.m=)
 obj.m!=1      # obj.m  !=  1       (not obj.m!)
-
+``` ((errata: this code is not relating to that condition)) @@ -1755,7 +1786,7 @@ Sometimes, you can refuse it. -h4. The reserved words +#### The reserved words After scanning the identifiers, there are about 100 lines of the code further @@ -1780,7 +1811,7 @@ Usually, only the data would be separated to a list or a hash in order to keep the code short. -
+```TODO-lang
 /* convert the code to data */
 struct entry {char *name; int symbol;};
 struct entry *table[] = {
@@ -1794,7 +1825,7 @@ struct entry *table[] = {
     ....
     return lookup_symbol(table, tok());
 }
-
+``` Then, how `ruby` is doing is that, it uses a hash table. @@ -1837,11 +1868,11 @@ The definition of `struct kwtable` is as follows:

▼ `kwtable`

-
+```TODO-lang
    1  struct kwtable {char *name; int id[2]; enum lex_state state;};
 
 (keywords)
-
+``` `name` is the name of the reserved word, `id[0]` is its symbol, @@ -1854,7 +1885,7 @@ This is the place where actually looking up.

▼ `yylex()` -- identifier -- call `rb_reserved_word()`

-
+```TODO-lang
 4173                  struct kwtable *kw;
 4174
 4175                  /* See if it is a reserved word.  */
@@ -1862,25 +1893,25 @@ This is the place where actually looking up.
 4177                  if (kw) {
 
 (parse.y)
-
+``` -h3. Strings +### Strings The double quote (`"`) part of `yylex()` is this.

▼ `yylex` − `'"'`

-
+```TODO-lang
 3318        case '"':
 3319          lex_strterm = NEW_STRTERM(str_dquote, '"', 0);
 3320          return tSTRING_BEG;
 
 (parse.y)
-
+``` Surprisingly it finishes after scanning only the first character. @@ -1889,7 +1920,7 @@ Then, this time, when taking a look at the rule,

▼ rules for strings

-
+```TODO-lang
 string1         : tSTRING_BEG string_contents tSTRING_END
 
 string_contents :
@@ -1905,7 +1936,7 @@ string_dvar     : tGVAR
                 | backref
 
 term_push       :
-
+``` These rules are the part introduced to deal with embedded expressions inside of strings. @@ -1914,9 +1945,9 @@ These rules are the part introduced to deal with embedded expressions inside of `tSTRING_DVAR` represents "`#` that in front of a variable". For example, -
+```TODO-lang
 ".....#$gvar...."
-
+``` this kind of syntax. @@ -1941,18 +1972,18 @@ the next `yylex()`. What plays an important role there is ... -
+```TODO-lang
       case '"':
         lex_strterm = NEW_STRTERM(str_dquote, '"', 0);
         return tSTRING_BEG;
-
+``` ... `lex_strterm`. Let's go back to the beginning of `yylex()`.

▼ the beginning of `yylex()`

-
+```TODO-lang
 3106  static int
 3107  yylex()
 3108  {
@@ -1971,7 +2002,7 @@ What plays an important role there is ...
 3136      switch (c = nextc()) {
 
 (parse.y)
-
+``` If `lex_strterm` exists, it enters the string mode without asking. @@ -1984,7 +2015,7 @@ This is done in the following part:

▼ `string_content`

-
+```TODO-lang
 1916  string_content  : ....
 1917                  | tSTRING_DBEG term_push
 1918                      {
@@ -2006,7 +2037,7 @@ This is done in the following part:
 1934                      }
 
 (parse.y)
-
+``` In the embedded action, `lex_stream` is saved as the value of `tSTRING_DBEG` @@ -2032,7 +2063,7 @@ if `bison` is assumed, it causes a little cumbersome. -h4. `lex_strterm` +#### `lex_strterm` As we've seen, when you consider `lex_stream` as a boolean value, @@ -2042,11 +2073,11 @@ First, let's look at its type.

▼ `lex_strterm`

-
+```TODO-lang
   72  static NODE *lex_strterm;
 
 (parse.y)
-
+``` This definition shows its type is `NODE*`. @@ -2058,12 +2089,12 @@ you should remember only these two points.

▼ `NEW_STRTERM()`

-
+```TODO-lang
 2865  #define NEW_STRTERM(func, term, paren) \
 2866          rb_node_newnode(NODE_STRTERM, (func), (term), (paren))
 
 (parse.y)
-
+``` This is a macro to create a node to be stored in `lex_stream`. @@ -2075,9 +2106,9 @@ and if it is a `'` string, it is `'`. `paren` is used to store the corresponding parenthesis when it is a `%` string. For example, -
+```TODO-lang
 %Q(..........)
-
+``` in this case, `paren` stores `'('`. And, `term` stores the closing parenthesis `')'`. @@ -2089,7 +2120,7 @@ The available types are decided as follows:

▼ `func`

-
+```TODO-lang
 2775  #define STR_FUNC_ESCAPE 0x01  /* backslash notations such as \n are in effect  */
 2776  #define STR_FUNC_EXPAND 0x02  /* embedded expressions are in effect */
 2777  #define STR_FUNC_REGEXP 0x04  /* it is a regular expression */
@@ -2106,7 +2137,7 @@ The available types are decided as follows:
 2788  };
 
 (parse.y)
-
+``` Each meaning of `enum string_type` is as follows: @@ -2122,14 +2153,14 @@ Each meaning of `enum string_type` is as follows: -h4. String scan function +#### String scan function The rest is reading `yylex()` in the string mode, in other words, the `if` at the beginning.

▼ `yylex`− string

-
+```TODO-lang
 3114      if (lex_strterm) {
 3115          int token;
 3116          if (nd_type(lex_strterm) == NODE_HEREDOC) {
@@ -2151,7 +2182,7 @@ in other words, the `if` at the beginning.
 3132      }
 
 (parse.y)
-
+``` It is divided into the two major groups: here document and others. @@ -2171,7 +2202,7 @@ I'd like readers who are interested in to try to look over it. -h4. Here Document +#### Here Document In comparison to the ordinary strings, here documents are fairly interesting. @@ -2181,7 +2212,7 @@ First, I'll show the code of `yylex()` to scan the starting symbol of a here doc

▼ `yylex`−`'<'`

-
+```TODO-lang
 3260        case '<':
 3261          c = nextc();
 3262          if (c == '<' &&
@@ -2194,7 +2225,7 @@ First, I'll show the code of `yylex()` to scan the starting symbol of a here doc
 3269              if (token) return token;
 
 (parse.y)
-
+``` @@ -2205,7 +2236,7 @@ Therefore, here is `heredoc_identifier()`.

▼ `heredoc_identifier()`

-
+```TODO-lang
 2926  static int
 2927  heredoc_identifier()
 2928  {
@@ -2222,14 +2253,17 @@ Therefore, here is `heredoc_identifier()`.
 2988  }
 
 (parse.y)
-
+``` The part which reads the starting symbol (`< + figure 10: scanning ` +
figure 10: scanning `"printf\(< + What `heredoc_identifier()` is doing is as follows:
@@ -2244,13 +2278,13 @@ read line) and `len` (the length that has already read) are saved. Then, the dynamic call graph before and after `heredoc_identifier` is simply shown below: -
+```TODO-lang
 yyparse
     yylex(case '<')
         heredoc_identifier(lex_strterm = ....)
     yylex(the beginning if)
         here_document
-
+``` And, this `here_document()` is doing the scan of the body of the here document. @@ -2260,7 +2294,7 @@ Notice that `lex_strterm` remains unchanged after it was set at `heredoc_identif

▼ `here_document()`(simplified)

-
+```TODO-lang
 here_document(NODE *here)
 {
     VALUE line;                      /* the line currently being scanned */
@@ -2286,7 +2320,7 @@ here_document(NODE *here)
     yylval.node = NEW_STR(str);
     return tSTRING_CONTENT;
 }
-
+``` `rb_str_cat()` is the function to connect a `char*` at the end of a Ruby string. It means that the currently being read line `lex_lastline` is connected to @@ -2304,7 +2338,7 @@ And finally, leaving the `do` ~ `while` loop, it is `heredoc_restore()`.

▼ `heredoc_restore()`

-
+```TODO-lang
 2990  static void
 2991  heredoc_restore(here)
 2992      NODE *here;
@@ -2321,7 +2355,7 @@ And finally, leaving the `do` ~ `while` loop, it is `heredoc_restore()`.
 3003  }
 
 (parse.y)
-
+``` `here->nd_orig` holds the line which contains the starting symbol.
@@ -2331,4 +2365,7 @@ It means it can continue to scan from the just after the starting symbol as if there was nothing happened. (Figure 11) -!images/ch_parser_heredoc.jpg(The picture of assignation of scanning Here Document)! +
+ figure 11: The picture of assignation of scanning Here Document +
figure 11: The picture of assignation of scanning Here Document
+
diff --git a/preface.textile b/preface.md similarity index 95% rename from preface.textile rename to preface.md index c0951e9..c7e4142 100644 --- a/preface.textile +++ b/preface.md @@ -2,7 +2,8 @@ layout: default --- -h2. Preface +Preface +======= This book explores several themes with the following goals in mind: @@ -81,7 +82,8 @@ the book that lets you practice such idealism exhaustively. "It's interesting because it's difficult." I'm glad if the number of people who think so will increase because of this book. -h2. Target audience +Target audience +=============== Firstly, knowledge about the Ruby language isn't required. However, since the knowledge of the Ruby language is absolutely necessary to understand certain @@ -99,7 +101,8 @@ seriously, without having any experience of using at least one of object-oriented languages, you will probably have a difficult time. In this book, I tried to use many examples in Java and C++. -h2. Structure of this book +Structure of this book +====================== This book has four main parts: @@ -119,7 +122,7 @@ Now, we are going through the overview of the four main parts. The symbol in parentheses after the explanation indicates the difficulty gauge. They are ==(C)==, (B), (A) in order of easy to hard, (S) being the highest. -h4. Part 1: Object +#### Part 1: Object | Chapter1 | Focuses the basics of Ruby to get ready to accomplish Part 1. ==(C)== | | Chapter2 | Gives concrete inner structure of Ruby objects. ==(C)== | @@ -129,7 +132,7 @@ h4. Part 1: Object | Chapter6 | Describes the implementation of global variables, class variables, and constants. ==(C)== | | Chapter7 | Outline of the security features of Ruby. ==(C)== | -h4. Part 2: Syntactic analysis +#### Part 2: Syntactic analysis | Chapter8 | Talks about almost complete specification of the Ruby language, in order to prepare for Part 2 and Part 3. ==(C)== | | Chapter9 | Introduction to @yacc@ required to read the syntax file at least. (B) | @@ -137,7 +140,7 @@ h4. Part 2: Syntactic analysis | Chapter11 | Explore around the peripherals of @lex_state@, which is the most difficult part of the parser. The most difficult part of this book. (S) | | Chapter12 | Finalization of Part 2 and connection to Part 3. ==(C)== | -h4. Part 3: Evaluator +#### Part 3: Evaluator | Chapter13 | Describe the basic mechanism of the evaluator. ==(C)== | | Chapter14 | Reads the evaluation stack that creates the main context of Ruby. (A) | @@ -145,12 +148,13 @@ h4. Part 3: Evaluator | Chapter16 | Defies the implementation of the iterator, the most characteristic feature of Ruby. (A) | | Chapter17 | Describe the implementation of the eval methods. (B) | -h4. Part 4: Peripheral around the evaluator +#### Part 4: Peripheral around the evaluator | Chapter18 | Run-time loading of libraries in C and Ruby. (B) | | Chapter19 | Describes the implementation of thread at the end of the core part. (A) | -h2. Environment +Environment +=========== This book describes on @ruby@ 1.7.3 2002-09-12 version. It's attached on the CD-ROM. Choose any one of @ruby-rhg.tar.gz@, @ruby-rhg.lzh@, or @ruby-rhg.zip@ @@ -227,7 +231,8 @@ However, the author owes the responsibility for this test. Please refrain from attempting to contact these people directly. If there's any flaw in execution, please be advised to contact the author by e-mail: `aamine@loveruby.net`. -h2. Web site +Web site +======== The web site for this book is `http://i.loveruby.net/ja/rhg/`. I will add information about related programs and additional documentation, as @@ -236,7 +241,8 @@ this book at the same time of the release. I will look for a certain circumstance to publicize more chapters, and the whole contents of the book will be at this website at the end. -h2. Acknowledgment +Acknowledgment +============== First of all, I would like to thank Mr. Yukihiro Matsumoto. He is the author of Ruby, and he made it in public as an open source software. Not only he @@ -292,10 +298,10 @@ p(right). Minero Aoki If you want to send remarks, suggestions and reports of typographcal errors, -please address to "Minero Aoki <aamine@loveruby.net>":mailto:aamine@loveruby.net . +please address to [Minero Aoki <aamine@loveruby.net>](mailto:aamine@loveruby.net). "Rubyソースコード完全解説" can be reserved/ordered at ImpressDirect. -"(Jump to the introduction page)":http://direct.ips.co.jp/directsys/go_x_TempChoice.cfm?sh_id=EE0040&spm_id=1&GM_ID=1721 +[(Jump to the introduction page)](http://direct.ips.co.jp/directsys/go_x_TempChoice.cfm?sh_id=EE0040&spm_id=1&GM_ID=1721) Copyright (c) 2002-2004 Minero Aoki, All rights reserved. diff --git a/security.textile b/security.md similarity index 95% rename from security.textile rename to security.md index 5ce7948..137241c 100644 --- a/security.textile +++ b/security.md @@ -4,9 +4,10 @@ title: Security --- Translated by Clifford Escobar CAOILE & ocha- -h1. Chapter 7: Security +Chapter 7: Security +------------------- -h3. Fundamentals +### Fundamentals I say security but I don't mean passwords or encryption. The Ruby security feature is used for handling untrusted objects in a environment like CGI @@ -47,7 +48,7 @@ We can skip 0 and move on to explain in detail levels 2 and 4. "Level 2 has no use currently" is right.)) -h4. Level 1 +#### Level 1 This level is for dangerous data, for example, in normal CGI applications, etc. @@ -60,7 +61,7 @@ exception to be raised and the attempt will be stopped. This tainted mark is "infectious". For example, when taking a part of a tainted string, that part is also tainted. -h4. Level 4 +#### Level 4 This level is for dangerous programs, for example, running external (unknown) programs, etc. @@ -71,7 +72,7 @@ I/O, thread manipulation, redefining methods, etc. Of course, the tainted mark information is used, but basically the operations are the criteria. -h4. Unit of Security +#### Unit of Security `$SAFE` looks like a global variable but is in actuality a thread local variable. In other words, Ruby's security system works on units @@ -84,7 +85,7 @@ program, then it should be made into a different thread and have its security level raised. I haven't yet explained how to create a thread, but I will show an example here: -
+```TODO-lang
 # Raise the security level in a different thread
 p($SAFE)   # 0 is the default
 Thread.fork {    # Start a different thread
@@ -92,9 +93,9 @@ Thread.fork {    # Start a different thread
     eval(str)    # Run the dangerous program
 }
 p($SAFE)   # Outside of the block, the level is still 0
-
+``` -h4. Reliability of `$SAFE` +#### Reliability of `$SAFE` Even with implementing the spreading of tainted marks, or restricting operations, ultimately it is still handled manually. In other words, @@ -115,7 +116,7 @@ open. Therefore it is prudent to think that `ruby` can probably be dangerous. -h3. Implementation +### Implementation From now on, we'll start to look into its implementation. @@ -137,7 +138,7 @@ The APIs to check are mainly these below two: We won't read `SafeStringValue()` here. -h4. Tainted Mark +#### Tainted Mark The taint mark is, to be concrete, the `FL_TAINT` flag, which is set to @@ -145,11 +146,11 @@ The taint mark is, to be concrete, the `FL_TAINT` flag, which is set to Here is its usage. -
+```TODO-lang
 OBJ_TAINT(obj)            /* set FL_TAINT to obj */
 OBJ_TAINTED(obj)          /* check if FL_TAINT is set to obj */
 OBJ_INFECT(dest, src)     /* infect FL_TAINT from src to dest */
-
+``` Since `OBJ_TAINT()` and `OBJ_TAINTED()` can be assumed not important, @@ -157,14 +158,14 @@ let's briefly look over only `OBJ_INFECT()`.

▼ `OBJ_INFECT`

-
+```TODO-lang
  441  #define OBJ_INFECT(x,s) do {                             \
           if (FL_ABLE(x) && FL_ABLE(s))                        \
               RBASIC(x)->flags |= RBASIC(s)->flags & FL_TAINT; \
       } while (0)
 
 (ruby.h)
-
+``` `FL_ABLE()` checks if the argument `VALUE` is a pointer or not. @@ -174,11 +175,11 @@ it would propagate the flag. -h4. $SAFE +#### $SAFE

▼ `ruby_safe_level`

-
+```TODO-lang
  124  int ruby_safe_level = 0;
 
 7401  static void
@@ -196,7 +197,7 @@ h4. $SAFE
 7413  }
 
 (eval.c)
-
+``` The substance of `$SAFE` is `ruby_safe_level` in `eval.c`. @@ -219,11 +220,11 @@ you can ignore the interface and modify the security level. -h4. `rb_secure()` +#### `rb_secure()`

▼ `rb_secure()`

-
+```TODO-lang
  136  void
  137  rb_secure(level)
  138      int level;
@@ -235,7 +236,7 @@ h4. `rb_secure()`
  144  }
 
 (eval.c)
-
+``` If the current safe level is more than or equal to `level`, diff --git a/spec.textile b/spec.md similarity index 91% rename from spec.textile rename to spec.md index 22ce53f..31a731b 100644 --- a/spec.textile +++ b/spec.md @@ -13,7 +13,8 @@ reading this. A complete exposition can be found in the Readers who know Ruby can skip over this chapter. -h2. Literals +Literals +======== The expressiveness of Ruby's literals is extremely high. In my opinion, what makes Ruby a script language @@ -31,21 +32,21 @@ by constructing straightforwardly. What kind of expressions are valid? Let's look at them one by one. -h3. Strings +### Strings Strings and regular expressions can't be missing in a scripting language. The expressiveness of Ruby's string is very various even more than the other Ruby's literals. -h4. Single Quoted Strings +#### Single Quoted Strings -
+```TODO-lang
 'string'              # 「string」
 '\\begin{document}'   # 「\begin{document}」
 '\n'                  # 「\n」backslash and an n, not a newline
 '\1'                  # 「\1」backslash and 1
 '\''                  # 「'」
-
+``` This is the simplest form. In C, what enclosed in single quotes becomes a character, @@ -59,29 +60,29 @@ And Ruby's strings aren't divided by newline characters. If we write a string over several lines the newlines are contained in the string. -
+```TODO-lang
 'multi
     line
         string'
-
+``` And if the `-K` option is given to the `ruby` command, multibyte strings will be accepted. At present the three encodings EUC-JP (`-Ke`), Shift JIS (`-Ks`), and UTF8 (`-Ku`) can be specified. -
+```TODO-lang
 '「漢字が通る」と「マルチバイト文字が通る」はちょっと違う'
 # 'There's a little difference between "Kanji are accepted" and "Multibyte characters are accepted".'
-
+``` -h4. Double Quoted Strings +#### Double Quoted Strings -
+```TODO-lang
 "string"              # 「string」
 "\n"                  # newline
 "\x0f"               # a byte given in hexadecimal form
 "page#{n}.html"       # embedding a command
-
+``` With double quotes we can use command expansion and backslash notation. The backslash notation is something classical that is also supported in C, @@ -99,21 +100,21 @@ are no limitations like only one variable or only one method. Getting this far, it is not a mere literal anymore but the entire thing can be considered as an expression to express a string. -
+```TODO-lang
 "embedded #{lvar} expression"
 "embedded #{@ivar} expression"
 "embedded #{1 + 1} expression"
 "embedded #{method_call(arg)} expression"
 "embedded #{"string in string"} expression"
-
+``` -h4. Strings with `%` +#### Strings with `%` -
+```TODO-lang
 %q(string)            # same as 'string'
 %Q(string)            # same as "string"
 %(string)             # same as %Q(string) or "string"
-
+``` If a lot of separator characters appear in a string, escaping all of them becomes a burden. In that case the separator characters can be @@ -121,10 +122,10 @@ changed by using `%`. In the following example, the same string is written as a `"`-string and `%`-string. -
+```TODO-lang
 ""
 %Q()
-
+``` The both expressions has the same length, but the `%`-one is a lot nicer to look at. @@ -135,13 +136,13 @@ Here we have used parentheses as delimiters, but something else is fine, too. Like brackets or braces or `#`. Almost every symbol is fine, even `%`. -
+```TODO-lang
 %q#this is string#
 %q[this is string]
 %q%this is string%
-
+``` -h4. Here Documents +#### Here Documents Here document is a syntax which can express strings spanning multiple lines. A normal string starts right after the delimiter `"` @@ -150,7 +151,7 @@ When using here document, the lines between the line which contains the starting `< +```TODO-lang "the characters between the starting symbol and the ending symbol will become a string." @@ -159,7 +160,7 @@ All lines between the starting and the ending line are in this here document EOS -
+``` Here we used `EOS` as identifier but any word is fine. Precisely speaking, all the character matching `[a-zA-Z_0-9]` and multi-byte @@ -172,18 +173,18 @@ Therefore, the position of the start identifier in the line is not important. Taking advantage of this, it doesn't matter that, for instance, it is written in the middle of an expression: -
+```TODO-lang
 printf(<
+```
 
 In this case the string `"count=%d\n"` goes in the place of `<
+```TODO-lang
 printf("count=%d\n", count_n(str))
-
+``` The position of the starting identifier is really not restricted, but on the contrary, there are strict @@ -192,26 +193,26 @@ and there must not be another letter in that line. However if we write the start symbol with a minus like this `<<-EOS` we can indent the line with the end symbol. -
+```TODO-lang
      <<-EOS
 It would be convenient if one could indent the content
 of a here document. But that's not possible.
 If you want that, writing a method to delete indents is
 usually a way to go. But beware of tabs.
      EOS
-
+``` Furthermore, the start symbol can be enclosed in single or double quotes. Then the properties of the whole here document change. When we change `< +```TODO-lang <<"EOS" One day is #{24 * 60 * 60} seconds. Incredible. EOS - +``` But `<<'EOS'` is not the same as a single quoted string. It starts the complete literal mode. Everything even backslashes go @@ -221,36 +222,36 @@ contains many backslashes. In Part 2, I'll explain how to parse a here document. But I'd like you to try to guess it before. -h3. Characters +### Characters Ruby strings are byte sequences, there are no character objects. Instead there are the following expressions which return the integers which correspond a certain character in ASCII code. -
+```TODO-lang
 ?a                    # the integer which corresponds to "a"
 ?.                    # the integer which corresponds to "."
 ?\n                   # LF
 ?\C-a                 # Ctrl-a
-
+``` -h3. Regular Expressions +### Regular Expressions -
+```TODO-lang
 /regexp/
 /^Content-Length:/i
 /正規表現/
 /\/\*.*?\*\//m        # An expression which matches C comments
 /reg#{1 + 1}exp/      # the same as /reg2exp/
-
+``` What is contained between slashes is a regular expression. Regular expressions are a language to designate string patterns. For example -
+```TODO-lang
 /abc/
-
+``` This regular expression matches a string where there's an `a` followed by a `b` followed by a `c`. It matches "abc" or "fffffffabc" or @@ -258,9 +259,9 @@ by a `b` followed by a `c`. It matches "abc" or "fffffffabc" or One can designate more special patterns. -
+```TODO-lang
 /^From:/
-
+``` This matches a string where there's a `From` followed by a `:` at the beginning of a line. There are several more expressions of this kind, @@ -290,24 +291,24 @@ To describe regular expression in detail, it's so large that one more can be written, so I'd like you to read another book for this subject. I recommend "Mastering Regular Expression" by Jeffrey E.F. Friedl. -h4. Regular Expressions with `%` +#### Regular Expressions with `%` Also as with strings, regular expressions also have a syntax for changing delimiters. In this case it is `%r`. To understand this, looking at some examples are enough to understand. -
+```TODO-lang
 %r(regexp)
 %r[/\*.*?\*/]            # matches a C comment
 %r("(?:[^"\\]+|\\.)*")   # matches a string in C
 %r{reg#{1 + 1}exp}       # embedding a Ruby expression
-
+``` -h3. Arrays +### Arrays A comma-separated list enclosed in brackets `[]` is an array literal. -
+```TODO-lang
 [1, 2, 3]
 ['This', 'is', 'an', 'array', 'of', 'string']
 
@@ -316,7 +317,7 @@ A comma-separated list enclosed in brackets `[]` is an array literal.
 lvar = $gvar = @ivar = @@cvar = nil
 [lvar, $gvar, @ivar, @@cvar]
 [Object.new(), Object.new(), Object.new()]
-
+``` Ruby's array (`Array`) is a list of arbitrary objects. From a syntactical standpoint, it's characteristic is that arbitrary expressions can be elements. @@ -328,45 +329,45 @@ together can also be written straightforwardly. Note that this is "an expression which generates an array object" as with the other literals. -
+```TODO-lang
 i = 0
 while i < 5
   p([1,2,3].id)    # Each time another object id is shown.
   i += 1
 end
-
+``` -h4. Word Arrays +#### Word Arrays When writing scripts one uses arrays of strings a lot, hence there is a special notation only for arrays of strings. That is `%w`. With an example it's immediately obvious. -
+```TODO-lang
 %w( alpha beta gamma delta )   # ['alpha','beta','gamma','delta']
 %w( 月 火 水 木 金 土 日 )
 %w( Jan Feb Mar Apr May Jun
     Jul Aug Sep Oct Nov Dec )
-
+``` There's also `%W` where expressions can be embedded. It's a feature implemented fairly recently. -
+```TODO-lang
 n = 5
 %w( list0 list#{n} )   # ['list0', 'list#{n}']
 %W( list0 list#{n} )   # ['list0', 'list5']
-
+``` The author hasn't come up with a good use of `%W` yet. -h3. Hashes +### Hashes Hash tables are data structure which store a one-to-one relation between arbitrary objects. By writing as follows, they will be expressions to generate tables. -
+```TODO-lang
 { 'key' => 'value', 'key2' => 'value2' }
 { 3 => 0, 'string' => 5, ['array'] => 9 }
 { Object.new() => 3, Object.new() => 'string' }
@@ -375,7 +376,7 @@ By writing as follows, they will be expressions to generate tables.
 { 0 => 0,
   1 => 3,
   2 => 6 }
-
+``` We explained hashes in detail in the third chapter "Names and Nametables". They are fast lookup tables which allocate memory slots depending @@ -386,32 +387,32 @@ Furthermore, when used as an argument of a method call, the `{...}` can be omitted under a certain condition. -
+```TODO-lang
   some_method(arg, key => value, key2 => value2)
 # some_method(arg, {key => value, key2 => value2}) # same as above
-
+``` With this we can imitate named (keyword) arguments. -
+```TODO-lang
 button.set_geometry('x' => 80, 'y' => '240')
-
+``` Of course in this case `set_geometry` must accept a hash as input. Though real keyword arguments will be transformed into parameter variables, it's not the case for this because this is just a "imitation". -h3. Ranges +### Ranges Range literals are oddballs which don't appear in most other languages. Here are some expressions which generate Range objects. -
+```TODO-lang
 0..5          # from 0 to 5 containing 5
 0...5         # from 0 to 5 not containing 5
 1+2 .. 9+0    # from 3 to 9 containing 9
 'a'..'z'      # strings from 'a' to 'z' containing 'z'
-
+``` If there are two dots the last element is included. If there are three dots it is not included. Not only integers but also floats @@ -427,69 +428,70 @@ it would be a runtime error. By the way, because the precedence of `..` and `...` is quite low, sometimes it is interpreted in a surprising way. -
+```TODO-lang
 1..5.to_a()   # 1..(5.to_a())
-
+``` I think my personality is relatively bent for Ruby grammar, but somehow I don't like only this specification. -h3. Symbols +### Symbols In Part 1, we talked about symbols at length. It's something corresponds one-to-one to an arbitrary string. In Ruby symbols are expressed with a `:` in front. -
+```TODO-lang
 :identifier
 :abcde
-
+``` These examples are pretty normal. Actually, besides them, all variable names and method names can become symbols with a `:` in front. Like this: -
+```TODO-lang
 :$gvar
 :@ivar
 :@@cvar
 :CONST
-
+``` Moreover, though we haven't talked this yet, `[]` or `attr=` can be used as method names, so naturally they can also be used as symbols. -
+```TODO-lang
 :[]
 :attr=
-
+``` When one uses these symbols as values in an array, it'll look quite complicated. -h3. Numerical Values +### Numerical Values This is the least interesting. One possible thing I can introduce here is that, when writing a million, -
+```TODO-lang
 1_000_000
-
+``` as written above, we can use underscore delimiters in the middle. But even this isn't particularly interesting. From here on in this book, we'll completely forget about numerical values. -h2. Methods +Methods +======= Let's talk about the definition and calling of methods. -h3. Definition and Calls +### Definition and Calls -
+```TODO-lang
 def some_method( arg )
   ....
 end
@@ -499,24 +501,24 @@ class C
     ....
   end
 end
-
+``` Methods are defined with `def`. If they are defined at toplevel they become function style methods, inside a class they become methods of this class. To call a method which was defined in a class, one usually has to create an instance with `new` as shown below. -
+```TODO-lang
 C.new().some_method(0)
-
+``` -h3. The Return Value of Methods +### The Return Value of Methods The return value of a method is, if a `return` is executed in the middle, its value. Otherwise, it's the value of the statement which was executed last. -
+```TODO-lang
 def one()     # 1 is returned
   return 1
   999
@@ -534,34 +536,34 @@ def three()   # 3 is returned
     999
   end
 end
-
+``` If the method body is empty, it would automatically be `nil`, and an expression without a value cannot put at the end. Hence every method has a return value. -h3. Optional Arguments +### Optional Arguments Optional arguments can also be defined. If the number of arguments doesn't suffice, the parameters are automatically assigned to default values. -
+```TODO-lang
 def some_method( arg = 9 )  # default value is 9
   p arg
 end
 
 some_method(0)    # 0 is shown.
 some_method()     # The default value 9 is shown.
-
+``` There can also be several optional arguments. But in that case they must all come at the end of the argument list. If elements in the middle of the list were optional, how the correspondences of the arguments would be very unclear. -
+```TODO-lang
 def right_decl( arg1, arg2, darg1 = nil, darg2 = nil )
   ....
 end
@@ -570,76 +572,76 @@ end
 def wrong_decl( arg, default = nil, arg2 )  # A middle argument cannot be optional
   ....
 end
-
+``` -h3. Omitting argument parentheses +### Omitting argument parentheses In fact, the parentheses of a method call can be omitted. -
+```TODO-lang
 puts 'Hello, World!'   # puts("Hello, World")
 obj = Object.new       # obj = Object.new()
-
+``` In Python we can get the method object by leaving out parentheses, but there is no such thing in Ruby. If you'd like to, you can omit more parentheses. -
+```TODO-lang
   puts(File.basename fname)
 # puts(File.basename(fname)) same as the above
-
+``` If we like we can even leave out more -
+```TODO-lang
   puts File.basename fname
 # puts(File.basename(fname))  same as the above
-
+``` However, recently this kind of "nested omissions" became a cause of warnings. It's likely that this will not pass anymore in Ruby 2.0. Actually even the parentheses of the parameters definition can also be omitted. -
+```TODO-lang
 def some_method param1, param2, param3
 end
 
 def other_method    # without arguments ... we see this a lot
 end
-
+``` Parentheses are often left out in method calls, but leaving out parentheses in the definition is not very popular. However if there are no arguments, the parentheses are frequently omitted. -h3. Arguments and Lists +### Arguments and Lists Because Arguments form a list of objects, there's nothing odd if we can do something converse: extracting a list (an array) as arguments, as the following example. -
+```TODO-lang
 def delegate(a, b, c)
   p(a, b, c)
 end
 
 list = [1, 2, 3]
 delegate(*list)   # identical to delegate(1, 2, 3)
-
+``` In this way we can distribute an array into arguments. Let's call this device a `*`argument now. Here we used a local variable for demonstration, but of course there is no limitation. We can also directly put a literal or a method call instead. -
+```TODO-lang
 m(*[1,2,3])    # We could have written the expanded form in the first place...
 m(*mcall())
-
+``` The @*@ argument can be used together with ordinary arguments, but the @*@ argument must come last. @@ -649,7 +651,7 @@ single way. In the definition on the other hand we can handle the arguments in bulk when we put a `*` in front of the parameter variable. -
+```TODO-lang
 def some_method( *args )
   p args
 end
@@ -657,23 +659,23 @@ end
 some_method()          # prints []
 some_method(0)         # prints [0]
 some_method(0, 1)      # prints [0,1]
-
+``` The surplus arguments are gathered in an array. Only one `*`parameter can be declared. It must also come after the default arguments. -
+```TODO-lang
 def some_method0( arg, *rest )
 end
 def some_method1( arg, darg = nil, *rest )
 end
-
+``` If we combine list expansion and bulk reception together, the arguments of one method can be passed as a whole to another method. This might be the most practical use of the `*`parameter. -
+```TODO-lang
 # a method which passes its arguments to other_method
 def delegate(*args)
   other_method(*args)
@@ -685,9 +687,9 @@ end
 
 delegate(0, 1, 2)      # same as other_method(0, 1, 2)
 delegate(10, 20, 30)   # same as other_method(10, 20, 30)
-
+``` -h3. Various Method Call Expressions +### Various Method Call Expressions Being just a single feature as 'method call' does not mean its representation is also single. Here is about so-called syntactic sugar. @@ -695,7 +697,7 @@ In Ruby there is a ton of it, and they are really attractive for a person who has a fetish for parsers. For instance the examples below are all method calls. -
+```TODO-lang
 1 + 2                   # 1.+(2)
 a == b                  # a.==(b)
 ~/regexp/               # /regexp/.~
@@ -703,13 +705,13 @@ obj.attr = val          # obj.attr=(val)
 obj[i]                  # obj.[](i)
 obj[k] = v              # obj.[]=(k,v)
 `cvs diff abstract.rd`  # Kernel.`('cvs diff abstract.rd')
-
+``` It's hard to believe until you get used to it, but `attr=`, `[]=`, `\`` are (indeed) all method names. They can appear as names in a method definition and can also be used as symbols. -
+```TODO-lang
 class C
   def []( index )
   end
@@ -719,7 +721,7 @@ end
 p(:attr=)
 p(:[]=)
 p(:`)
-
+``` As there are people who don't like sweets, there are also many people who dislike syntactic sugar. Maybe they feel unfair when the things which are @@ -728,12 +730,12 @@ essentially the same appear in faked looks. Let's see some more details. -h4. Symbol Appendices +#### Symbol Appendices -
+```TODO-lang
 obj.name?
 obj.name!
-
+``` First a small thing. It's just appending a `?` or a `!`. Call and Definition do not differ, so it's not too painful. There are convention for what @@ -742,11 +744,11 @@ It's just a convention at human level. This is probably influenced from Lisp in which a great variety of characters can be used in procedure names. -h4. Binary Operators +#### Binary Operators -
+```TODO-lang
 1 + 2    # 1.+(2)
-
+``` Binary Operators will be converted to a method call to the object on the left hand side. Here the method `+` from the object `1` is called. @@ -754,7 +756,7 @@ As listed below there are many of them. There are the general operators `+` and `-`, also the equivalence operator `==` and the spaceship operator `<=>' as in Perl, all sorts. They are listed in order of their precedence. -
+```TODO-lang
 **
 * / %
 + -
@@ -763,18 +765,18 @@ As listed below there are many of them. There are the general operators
 | ^
 > >= < <=
 <=> == === =~
-
+``` The symbols `&` and `|` are methods, but the double symbols `&&` and `||` are built-in operators. Remember how it is in C. -h4. Unary Operators +#### Unary Operators -
+```TODO-lang
 +2
 -1.0
 ~/regexp/
-
+``` These are the unary operators. There are only three of them: `+ - ~`. `+` and `-` work as they look like (by default). @@ -789,17 +791,17 @@ Of course they can be called by just writing `+n` or `-n`. part of the literal. This is a kind of optimizations.)) -h4. Attribute Assignment +#### Attribute Assignment -
+```TODO-lang
 obj.attr = val   # obj.attr=(val)
-
+``` This is an attribute assignment fashion. The above will be translated into the method call `attr=`. When using this together with method calls whose parentheses are omitted, we can write code which looks like attribute access. -
+```TODO-lang
 class C
   def i() @i end          # We can write the definition in one line
   def i=(n) @i = n end
@@ -808,7 +810,7 @@ end
 c = C.new
 c.i = 99
 p c.i    # prints 99
-
+``` However it will turn out both are method calls. They are similar to get/set property in Delphi or slot accessors in CLOS. @@ -816,23 +818,23 @@ They are similar to get/set property in Delphi or slot accessors in CLOS. Besides, we cannot define a method such as `obj.attr(arg)=`, which can take another argument in the attribute assignment fashion. -h4. Index Notation +#### Index Notation -
+```TODO-lang
 obj[i]    # obj.[](i)
-
+``` The above will be translated into a method call for `[]`. Array and hash access are also implemented with this device. -
+```TODO-lang
 obj[i] = val   # obj.[]=(i, val)
-
+``` Index assignment fashion. This is translated into a call for a method named `[]=`. -h3. `super` +### `super` We relatively often have a situation where we want add a little bit to the behaviour of an already @@ -841,7 +843,7 @@ Here a mechanism to call a method of the superclass when overwriting a method is required. In Ruby, that's `super`. -
+```TODO-lang
 class A
   def test
     puts 'in A'
@@ -852,7 +854,7 @@ class B < A
     super   # invokes A#test
   end
 end
-
+``` Ruby's `super` differs from the one in Java. This single word means "call the method with the same name in the superclass". @@ -862,7 +864,7 @@ When using `super`, be careful about the difference between `super` with no arguments and `super` whose arguments are omitted. The `super` whose arguments are omitted passes all the given parameter variables. -
+```TODO-lang
 class A
   def test( *args )
     p args
@@ -880,9 +882,9 @@ class B < A
 end
 
 B.new.test(1,2,3)
-
+``` -h4. Visibility +#### Visibility In Ruby, even when calling the same method, it can be or cannot be called depending on the location (meaning the @@ -908,7 +910,7 @@ Be careful. Usually we control visibility as shown below. -
+```TODO-lang
 class C
   public
   def a1() end   # becomes public
@@ -922,7 +924,7 @@ class C
   def c1() end   # becomes protected
   def c2() end   # becomes protected
 end
-
+``` Here `public`, `private` and `protected are method calls without parentheses. These aren't even reserved words. @@ -931,7 +933,7 @@ parentheses. These aren't even reserved words. the visibility of a particular method. But its mechanism is not interesting. We'll leave this out. -h4. Module functions +#### Module functions Given a module 'M'. If there are two methods with the exact same content @@ -945,12 +947,12 @@ then we call this a module function. It is not apparent why this should be useful. But let's look at the next example which is happily used. -
+```TODO-lang
 Math.sin(5)       # If used for a few times this is more convenient
 
 include Math
 sin(5)            # If used more often this is more practical
-
+``` It's important that both functions have the same content. With a different `self` but with the same code the behavior should @@ -958,7 +960,8 @@ still be the same. Instance variables become extremely difficult to use. Hence such method is very likely a method in which only procedures are written (like `sin`). That's why they are called module "functions". -h2. Iterators +Iterators +========= Ruby's iterators differ a bit from Java's or C++'s iterator classes or 'Iterator' design pattern. Precisely speaking, those iterators @@ -966,28 +969,28 @@ are called exterior iterators, Ruby's iterators are interior iterators. Regarding this, it's difficult to understand from the definition so let's explain it with a concrete example. -
+```TODO-lang
 arr = [0,2,4,6.8]
-
+``` This array is given and we want to access the elements in order. In C style we would write the following. -
+```TODO-lang
 i = 0
 while i < arr.length
   print arr[i]
   i += 1
 end
-
+``` Using an iterator we can write: -
+```TODO-lang
 arr.each do |item|
   print item
 end
-
+``` Everything from `each do` to `end` is the call to an iterator method. More precisely `each` is the iterator method and between @@ -1005,7 +1008,7 @@ to the cut out piece of code. We can also think the other way round. The other parts except `print item` are being cut out and enclosed into the `each` method. -
+```TODO-lang
 i = 0
 while i < arr.length
   print arr[i]
@@ -1015,9 +1018,9 @@ end
 arr.each do |item|
   print item
 end
-
+``` -h3. Comparison with higher order functions +### Comparison with higher order functions What comes closest in C to iterators are functions which receive function pointers, it means higher order functions. But there are two points in which iterators in Ruby @@ -1026,23 +1029,23 @@ and higher order functions in C differ. Firstly, Ruby iterators can only take one block. For instance we can't do the following. -
+```TODO-lang
 # Mistake. Several blocks cannot be passed.
 array_of_array.each do |i|
   ....
 end do |j|
   ....
 end
-
+``` Secondly, Ruby's blocks can share local variables with the code outside. -
+```TODO-lang
 lvar = 'ok'
 [0,1,2].each do |i|
   p lvar    # Can acces local variable outside the block.
 end
-
+``` That's where iterators are convenient. @@ -1051,17 +1054,17 @@ with the inside of the iterator method ( e.g. `each`). Putting it intuitively, only the variables in the place which looks of the source code continued are visible. -h3. Block Local Variables +### Block Local Variables Local variables which are assigned inside a block stay local to that block, it means they become block local variables. Let's check it out. -
+```TODO-lang
 [0].each do
   i = 0
   p i     # 0
 end
-
+``` For now, to create a block, we apply `each` on an array of length 1 (We can fully leave out the block parameter). @@ -1071,7 +1074,7 @@ This makes `i` block local. It is said block local, so it should not be able to access from the outside. Let's test it. -
+```TODO-lang
 % ruby -e '
 [0].each do
   i = 0
@@ -1080,7 +1083,7 @@ p i     # Here occurs an error.
 '
 -e:5: undefined local variable or method `i'
 for # (NameError)
-
+``` When we referenced a block local variable from outside the block, surely an error occured. Without a doubt it stayed local to the block. @@ -1088,7 +1091,7 @@ surely an error occured. Without a doubt it stayed local to the block. Iterators can also be nested repeatedly. Each time the new block creates another scope. -
+```TODO-lang
 lvar = 0
 [1].each do
   var1 = 1
@@ -1103,14 +1106,14 @@ lvar = 0
   # Here lvar, var1 can be seen
 end
 # Here only lvar can be seen
-
+``` There's one point which you have to keep in mind. Differing from nowadays' major languages Ruby's block local variables don't do shadowing. Shadowing means for instance in C that in the code below the two declared variables `i` are different. -
+```TODO-lang
 {
     int i = 3;
     printf("%d\n", i);         /* 3 */
@@ -1120,7 +1123,7 @@ variables `i` are different.
     }
     printf("%d\n", i);         /* 3 (元に戻った) */
 }
-
+``` Inside the block the @i@ inside overshadows the @i@ outside. That's why it's called shadowing. @@ -1128,7 +1131,7 @@ That's why it's called shadowing. But what happens with block local variables of Ruby where there's no shadowing. Let's look at this example. -
+```TODO-lang
 i = 0
 p i           # 0
 [0].each do
@@ -1136,7 +1139,7 @@ p i           # 0
   p i         # 1
 end
 p i           # 1 the change is preserved
-
+``` Even when we assign @i@ inside the block, if there is the same name outside, it would be used. @@ -1145,7 +1148,7 @@ changed. On this point there came many complains: "This is error prone. Please do shadowing." Each time there's nearly flaming but till now no conclusion was reached. -h3. The syntax of iterators +### The syntax of iterators There are some smaller topics left. @@ -1153,7 +1156,7 @@ First, there are two ways to write an iterator. One is the `do` ~ `end` as used above, the other one is the enclosing in braces. The two expressions below have exactly the same meaning. -
+```TODO-lang
 arr.each do |i|
   puts i
 end
@@ -1161,33 +1164,33 @@ end
 arr.each {|i|    # The author likes a four space indentation for
     puts i       # an iterator with braces.
 }
-
+``` But grammatically the precedence is different. The braces bind much stronger than `do`~`end`. -
+```TODO-lang
 m m do .... end    # m(m) do....end
 m m { .... }       # m(m() {....})
-
+``` And iterators are definitely methods, so there are also iterators that take arguments. -
+```TODO-lang
 re = /^\d/                 # regular expression to match a digit at the beginning of the line
 $stdin.grep(re) do |line|  # look repeatedly for this regular expression
   ....
 end
-
+``` -h3. `yield` +### `yield` Of course users can write their own iterators. Methods which have a `yield` in their definition text are iterators. Let's try to write an iterator with the same effect as `Array#each`: -
+```TODO-lang
 # adding the definition to the Array class
 class Array
   def my_each
@@ -1208,29 +1211,29 @@ end
 [0,1,2,3,4].my_each do |i|
   p i
 end
-
+``` @yield@ calls the block. At this point control is passed to the block, when the execution of the block finishes it returns back to the same location. Think about it like a characteristic function call. When the present method does not have a block a runtime error will occur. -
+```TODO-lang
 % ruby -e '[0,1,2].each'
 -e:1:in `each': no block given (LocalJumpError)
         from -e:1
-
+``` -h3. `Proc` +### `Proc` I said, that iterators are like cut out code which is passed as an argument. But we can even more directly make code to an object and carry it around. -
+```TODO-lang
 twice = Proc.new {|n| n * 2 }
 p twice.call(9)   # 18 will be printed
-
+``` In short, it is like a function. As might be expected from the fact it is created with @new@, the return value of @Proc.new@ is an instance @@ -1243,11 +1246,11 @@ which turns an iterator block into an object. Besides there is a function style method @lambda@ provided which has the same effect as @Proc.new@. Choose whatever suits you. -
+```TODO-lang
 twice = lambda {|n| n * 2 }
-
+``` -h4. Iterators and `Proc` +#### Iterators and `Proc` Why did we start talking all of a sudden about @Proc@? Because there is a deep relationship between iterators and @Proc@. @@ -1257,14 +1260,14 @@ That's why one can be transformed into the other. First, to turn an iterator block into a @Proc@ object one has to put an @&@ in front of the parameter name. -
+```TODO-lang
 def print_block( &block )
   p block
 end
 
 print_block() do end   # Shows something like 
 print_block()          # Without a block nil is printed
-
+``` With an @&@ in front of the argument name, the block is transformed to a @Proc@ object and assigned to the variable. If the method is not an @@ -1273,21 +1276,21 @@ iterator (there's no block attached) @nil@ is assigned. And in the other direction, if we want to pass a @Proc@ to an iterator we also use @&@. -
+```TODO-lang
 block = Proc.new {|i| p i }
 [0,1,2].each(&block)
-
+``` This code means exactly the same as the code below. -
+```TODO-lang
 [0,1,2].each {|i| p i }
-
+``` If we combine these two, we can delegate an iterator block to a method somewhere else. -
+```TODO-lang
 def each_item( &block )
   [0,1,2].each(&block)
 end
@@ -1295,9 +1298,10 @@ end
 each_item do |i|    # same as [0,1,2].each do |i|
   p i
 end
-
+``` -h2. Expressions +Expressions +=========== "Expressions" in Ruby are things with which we can create other expressions or statements by combining with the others. @@ -1307,13 +1311,13 @@ But literals and method calls are not always combinations of elements. On the contrary, "expressions", which I'm going to introduce, always consists of some elements. -h3. `if` +### `if` We probably do not need to explain the @if@ expression. If the conditional expression is true, the body is executed. As explained in Part 1, every object except @nil@ and @false@ is true in Ruby. -
+```TODO-lang
 if cond0 then
   ....
 elsif cond1 then
@@ -1323,14 +1327,14 @@ elsif cond2 then
 else
   ....
 end
-
+``` `elsif`/`else`-clauses can be omitted. Each `then` as well. But there are some finer requirements concerning @then@. For this kind of thing, looking at some examples is the best way to understand. Here only thing I'd say is that the below codes are valid. -
+```TODO-lang
 # 1                                    # 4
 if cond then ..... end                 if cond
                                        then .... end
@@ -1340,37 +1344,37 @@ if cond; .... end                      # 5
 # 3                                    then
 if cond then; .... end                   ....
                                        end
-
+``` And in Ruby, `if` is an expression, so there is the value of the entire `if` expression. It is the value of the body where a condition expression is met. For example, if the condition of the first `if` is true, the value would be the one of its body. -
+```TODO-lang
 p(if true  then 1 else 2 end)   #=> 1
 p(if false then 1 else 2 end)   #=> 2
 p(if false then 1 elsif true then 2 else 3 end)   #=> 2
-
+``` If there's no match, or the matched clause is empty, the value would be @nil@. -
+```TODO-lang
 p(if false then 1 end)    #=> nil
 p(if true  then   end)    #=> nil
-
+``` -h3. `unless` +### `unless` An @if@ with a negated condition is an @unless@. The following two expressions have the same meaning. -
+```TODO-lang
 unless cond then          if not (cond) then
   ....                      ....
 end                       end
-
+``` @unless@ can also have attached @else@ clauses but any @elsif@ cannot be attached. @@ -1381,33 +1385,33 @@ Needless to say, @then@ can be omitted. clause. If there's no match or the matched clause is empty, the value would be @nil@. -h3. `and && or ||` +### `and && or ||` The most likely utilization of the @and@ is probably a boolean operation. For instance in the conditional expression of an @if@. -
+```TODO-lang
 if cond1 and cond2
   puts 'ok'
 end
-
+``` But as in Perl, `sh` or Lisp, it can also be used as a conditional branch expression. The two following expressions have the same meaning. -
+```TODO-lang
                                         if invalid?(key)
 invalid?(key) and return nil              return nil
                                         end
-
+``` @&&@ and @and@ have the same meaning. Different is the binding order. -
+```TODO-lang
 method arg0 &&  arg1    # method(arg0 && arg1)
 method arg0 and arg1    # method(arg0) and arg1
-
+``` Basically the symbolic operator creates an expression which can be an argument (`arg`). @@ -1420,49 +1424,49 @@ the right hand side will also be evaluated. On the other hand @or@ is the opposite of @and@. If the evaluation of the left hand side is false, the right hand side will also be evaluated. -
+```TODO-lang
 valid?(key) or return nil
-
+``` @or@ and @||@ have the same relationship as @&&@ and @and@. Only the precedence is different. -h3. The Conditional Operator +### The Conditional Operator There is a conditional operator similar to C: -
+```TODO-lang
 cond ? iftrue : iffalse
-
+``` The space between the symbols is important. If they bump together the following weirdness happens. -
+```TODO-lang
 cond?iftrue:iffalse   # cond?(iftrue(:iffalse))
-
+``` The value of the conditional operator is the value of the last executed expression. Either the value of the true side or the value of the false side. -h3. `while until` +### `while until` Here's a `while` expression. -
+```TODO-lang
 while cond do
   ....
 end
-
+``` This is the simplest loop syntax. As long as @cond@ is true the body is executed. The @do@ can be omitted. -
+```TODO-lang
 until io_ready?(id) do
   sleep 0.5
 end
-
+``` @until@ creates a loop whose condition definition is opposite. As long as the condition is false it is executed. @@ -1473,7 +1477,7 @@ Naturally there is also jump syntaxes to exit a loop. but @continue@ is @next@. Perhaps @next@ has come from Perl. -
+```TODO-lang
 i = 0
 while true
   if i > 10
@@ -1484,18 +1488,18 @@ while true
   end
   i += 1
 end
-
+``` And there is another Perlism: the @redo@. -
+```TODO-lang
 while cond
   # (A)
   ....
   redo
   ....
 end
-
+``` It will return to (A) and repeat from there. What differs from @next@ is it does not check the condition. @@ -1504,12 +1508,12 @@ I might come into the world top 100, if the amount of Ruby programs would be counted, but I haven't used @redo@ yet. It does not seem to be necessary after all because I've lived happily despite of it. -h3. `case` +### `case` A special form of the @if@ expression. It performs branching on a series of conditions. The following left and right expressions are identical in meaning. -
+```TODO-lang
 case value
 when cond1 then                if cond1 === value
   ....                           ....
@@ -1520,7 +1524,7 @@ when cond3, cond4 then         elsif cond3 === value or cond4 === value
 else                           else
   ....                           ....
 end                            end
-
+``` The threefold equals @===@ is, as the same as the @==@, actually a method call. Notice that the receiver is the object on the left hand side. Concretely, @@ -1533,7 +1537,7 @@ Since `case` has many grammatical elements, to list them all would be tedious, thus we will not cover them in this book. -h3. Exceptions +### Exceptions This is a control structure which can pass over method boundaries and transmit errors. Readers who are acquainted to C++ or Java @@ -1543,9 +1547,9 @@ same. In Ruby exceptions come in the form of the function style method `raise`. `raise` is not a reserved word. -
+```TODO-lang
 raise ArgumentError, "wrong number of argument"
-
+``` In Ruby exception are instances of the @Exception@ class and it's subclasses. This form takes an exception class as its first argument @@ -1554,32 +1558,32 @@ an instance of @ArgumentError@ is created and "thrown". Exception object would ditch the part after the @raise@ and start to return upwards the method call stack. -
+```TODO-lang
 def raise_exception
   raise ArgumentError, "wrong number of argument"
   # the code after the exception will not be executed
   puts 'after raise'
 end
 raise_exception()
-
+``` If nothing blocks the exception it will move on and on and finally it will reach the top level. When there's no place to return any more, @ruby@ gives out a message and ends with a non-zero exit code. -
+```TODO-lang
 % ruby raise.rb
 raise.rb:2:in `raise_exception': wrong number of argument (ArgumentError)
         from raise.rb:7
-
+``` However an @exit@ would be sufficient for this, and for an exception there should be a way to set handlers. In Ruby, @begin@~@rescue@~@end@ is used for this. It resembles the @try@~@catch@ in C++ and Java. -
+```TODO-lang
 def raise_exception
   raise ArgumentError, "wrong number of argument"
 end
@@ -1590,7 +1594,7 @@ rescue ArgumentError => err then
   puts 'exception catched'
   p err
 end
-
+``` @rescue@ is a control structure which captures exceptions, it catches exception objects of the specified class and its subclasses. In the @@ -1599,24 +1603,24 @@ where @ArgumentError@ is targeted, so it matches this @rescue@. By @=>err@ the exception object will be assigned to the local variable @err@, after that the @rescue@ part is executed. -
+```TODO-lang
 % ruby rescue.rb
 exception catched
 #
-
+``` When an exception is rescued, it will go through the `rescue` and it will start to execute the subsequent as if nothing happened, but we can also make it retry from the `begin`. To do so, `retry` is used. -
+```TODO-lang
 begin    # the place to return
   ....
 rescue ArgumentError => err then
   retry  # retry your life
 end
-
+``` We can omit the @=>err@ and the @then@ after @rescue@. We can also leave out the exception class. In this case, it means as the same as when the @@ -1626,14 +1630,14 @@ If we want to catch more exception classes, we can just write them in line. When we want to handle different errors differently, we can specify several `rescue` clauses. -
+```TODO-lang
 begin
   raise IOError, 'port not ready'
 rescue ArgumentError, TypeError
 rescue IOError
 rescue NameError
 end
-
+``` When written in this way, a `rescue` clause that matches the exception class is searched in order from the top. Only the matched clause will be executed. @@ -1642,7 +1646,7 @@ For instance, only the clause of @IOError@ will be executed in the above case. On the other hand, when there is an @else@ clause, it is executed only when there is no exception. -
+```TODO-lang
 begin
   nil    # Of course here will no error occur
 rescue ArgumentError
@@ -1650,19 +1654,19 @@ rescue ArgumentError
 else
   # This part will be executed
 end
-
+``` Moreover an @ensure@ clause will be executed in every case: when there is no exception, when there is an exception, rescued or not. -
+```TODO-lang
 begin
   f = File.open('/etc/passwd')
   # do stuff
 ensure   # this part will be executed anyway
   f.close
 end
-
+``` By the way, this @begin@ expression also has a value. The value of the whole @begin@~@end@ expression is the value of the part which was executed @@ -1671,18 +1675,18 @@ It means the last statement of the clauses aside from `ensure`. The reason why the @ensure@ is not counted is probably because @ensure@ is usually used for cleanup (thus it is not a main line). -h3. Variables and Constants +### Variables and Constants Referring a variable or a constant. The value is the object the variable points to. We already talked in too much detail about the various behaviors. -
+```TODO-lang
 lvar
 @ivar
 @@cvar
 CONST
 $gvar
-
+``` I want to add one more thing. Among the variables starting with @$@, @@ -1701,35 +1705,35 @@ the @$?@ to hold the status of a child process, the @$SAFE@ to represent the security level, they are all thread local. -h3. Assignment +### Assignment Variable assignments are all performed by `=`. All variables are typeless. What is saved is a reference to an object. As its implementation, it was a `VALUE` (pointer). -
+```TODO-lang
 var = 1
 obj = Object.new
 @ivar = 'string'
 @@cvar = ['array']
 PI = 3.1415926535
 $gvar = {'key' => 'value'}
-
+``` However, as mentioned earlier `obj.attr=val` is not an assignment but a method call. -h3. Self Assignment +### Self Assignment -
+```TODO-lang
 var += 1
-
+``` This syntax is also in C/C++/Java. In Ruby, -
+```TODO-lang
 var = var + 1
-
+``` it is a shortcut of this code. Differing from C, the Ruby @+@ is a method and thus part of the library. @@ -1741,7 +1745,7 @@ In Ruby @+=@ is always defined as an operation of the combination of @+@ and ass We can also combine self assignment and an attribute-access-flavor method. The result more looks like an attribute. -
+```TODO-lang
 class C
   def i() @i end          # A method definition can be written in one line.
   def i=(n) @i = n end
@@ -1751,7 +1755,7 @@ obj = C.new
 obj.i = 1
 obj.i += 2    # obj.i = obj.i + 2
 p obj.i       # 3
-
+``` If there is `+=` there might also be `++` but this is not the case. Why is that so? In Ruby assignment is dealt with on the language level. @@ -1766,15 +1770,15 @@ I am also in favor of @++@ but not as much as I can't do without, and I have not felt so much needs of @++@ in Ruby in the first place, so I've kept silent and decided to forget about it. -h3. `defined?` +### `defined?` @defined?@ is a syntax of a quite different color in Ruby. It tells whether an expression value is "defined" or not at runtime. -
+```TODO-lang
 var = 1
 defined?(var)   #=> true
-
+``` In other words it tells whether a value can be obtained from the expression received as its argument (is it okay to call it so?) when the expression is @@ -1785,7 +1789,8 @@ method call which raises an error in it. I would have loved to tell you more about @defined?@ but it will not appear again in this book. What a pity. -h2. Statements +Statements +========== A statement is what basically cannot be combined with the other syntaxes, in other words, they are lined vertically. @@ -1797,7 +1802,7 @@ However this is rarely recommended and isn't useful, you'd better regard them lightly in this way. Here we also skip about the value of each statement. -h3. The Ending of a statement +### The Ending of a statement Up to now we just said "For now one line's one statement". But Ruby's statement ending's aren't that straightforward. @@ -1805,71 +1810,71 @@ But Ruby's statement ending's aren't that straightforward. First a statement can be ended explicitly with a semicolon as in C. Of course then we can write two and more statements in one line. -
+```TODO-lang
 puts 'Hello, World!'; puts 'Hello, World once more!'
-
+``` On the other hand, when the expression apparently continues, such as just after opened parentheses, dyadic operators, or a comma, the statement continues automatically. -
+```TODO-lang
 # 1 + 3 * method(6, 7 + 8)
 1 +
   3 *
      method(
             6,
             7 + 8)
-
+``` But it's also totally no problem to use a backslash to explicitly indicate the continuation. -
+```TODO-lang
 p 1 + \
   2
-
+``` -h3. The Modifiers `if` and `unless` +### The Modifiers `if` and `unless` The `if` modifier is an irregular version of the normal `if` The programs on the left and right mean exactly the same. -
+```TODO-lang
 on_true() if cond                if cond
                                    on_true()
                                  end
-
+``` The `unless` is the negative version. Guard statements ( statements which exclude exceptional conditions) can be conveniently written with it. -h3. The Modifiers `while` and `until` +### The Modifiers `while` and `until` `while` and `until` also have a back notation. -
+```TODO-lang
 process() while have_content?
 sleep(1) until ready?
-
+``` Combining this with `begin` and `end` gives a `do`-`while`-loop like in C. -
+```TODO-lang
 begin
   res = get_response(id)
 end while need_continue?(res)
-
+``` -h3. Class Definition +### Class Definition -
+```TODO-lang
 class C < SuperClass
   ....
 end
-
+``` Defines the class `C` which inherits from `SuperClass` @@ -1879,18 +1884,18 @@ become @self@ within the statement, arbitrary expressions can be written within. definitions can be nested. They form the foundation of Ruby execution image. -h3. Method Definition +### Method Definition -
+```TODO-lang
 def m(arg)
 end
-
+``` I've already written about method definition and won't add more. This section is put to make it clear that they also belong to statements. -h3. Singleton method definition +### Singleton method definition We already talked a lot about singleton methods in Part 1. They do not belong to classes but to objects, in fact, they belong @@ -1898,21 +1903,21 @@ to singleton classes. We define singleton methods by putting the receiver in front of the method name. Parameter declaration is done the same way like with ordinary methods. -
+```TODO-lang
 def obj.some_method
 end
 
 def obj.some_method2( arg1, arg2, darg = nil, *rest, &block )
 end
-
+``` -h3. Definition of Singleton methods +### Definition of Singleton methods -
+```TODO-lang
 class << obj
   ....
 end
-
+``` From the viewpoint of purposes, it is the statement to define some singleton methods in a bundle. @@ -1922,56 +1927,56 @@ executed. In all over the Ruby program, this is the only place where a singleton class is exposed. -
+```TODO-lang
 class << obj
   p self  #=> #>   # Singleton Class 「(obj)」
   def a() end   # def obj.a
   def b() end   # def obj.b
 end
-
+``` -h3. Multiple Assignment +### Multiple Assignment With a multiple assignment, several assignments can be done all at once. The following is the simplest case: -
+```TODO-lang
 a, b, c = 1, 2, 3
-
+``` It's exactly the same as the following. -
+```TODO-lang
 a = 1
 b = 2
 c = 3
-
+``` Just being concise is not interesting. in fact, when an array comes in to be mixed, it becomes something fun for the first time. -
+```TODO-lang
 a, b, c = [1, 2, 3]
-
+``` This also has the same result as the above. Furthermore, the right hand side does not need to be a grammatical list or a literal. It can also be a variable or a method call. -
+```TODO-lang
 tmp = [1, 2, 3]
 a, b, c = tmp
 ret1, ret2 = some_method()   # some_method might probably return several values
-
+``` Precisely speaking it is as follows. Here we'll assume @obj@ is (the object of) the value of the left hand side, -# `obj` if it is an array -# if its `to_ary` method is defined, it is used to convert `obj` to an array. -# `[obj]` +* `obj` if it is an array +* if its `to_ary` method is defined, it is used to convert `obj` to an array. +* `[obj]` Decide the right-hand side by following this procedure and perform assignments. @@ -1980,48 +1985,48 @@ are totally independent from each other. And it goes on, both the left and right hand side can be infinitely nested. -
+```TODO-lang
 a, (b, c, d) = [1, [2, 3, 4]]
 a, (b, (c, d)) = [1, [2, [3, 4]]]
 (a, b), (c, d) = [[1, 2], [3, 4]]
-
+``` As the result of the execution of this program, each line will be `a=1 b=2 c=3 d=4`. And it goes on. The left hand side can be index or parameter assignments. -
+```TODO-lang
 i = 0
 arr = []
 arr[i], arr[i+1], arr[i+2] = 0, 2, 4
 p arr    # [0, 2, 4]
 
 obj.attr0, obj.attr1, obj.attr2 = "a", "b", "c"
-
+``` And like with method parameters, @*@ can be used to receive in a bundle. -
+```TODO-lang
 first, *rest = 0, 1, 2, 3, 4
 p first  # 0
 p rest   # [1, 2, 3, 4]
-
+``` When all of them are used all at once, it's extremely confusing. -h4. Block parameter and multiple assignment +#### Block parameter and multiple assignment We brushed over block parameters when we were talking about iterators. But there is a deep relationship between them and multiple assignment. For instance in the following case. -
+```TODO-lang
 array.each do |i|
   ....
 end
-
+``` Every time when the block is called, the `yield`ed arguments are multi-assigned to `i`. @@ -2030,31 +2035,31 @@ But if there are two or more variables, it would a little more look like it. For instance, @Hash#each@ is an repeated operation on the pairs of keys and values, so usually we call it like this: -
+```TODO-lang
 hash.each do |key, value|
   ....
 end
-
+``` In this case, each array consist of a key and a value is `yield`ed from the hash. Hence we can also does the following thing by using nested multiple assignment. -
+```TODO-lang
 # [[key,value],index] are yielded
 hash.each_with_index do |(key, value), index|
   ....
 end
-
+``` -h3. `alias` +### `alias` -
+```TODO-lang
 class C
   alias new orig
 end
-
+``` Defining another method `new` with the same body as the already defined method `orig`. `alias` are similar to hardlinks in a unix @@ -2064,13 +2069,13 @@ because the names themselves are independent of each other, even if one method name is overwritten by a subclass method, the other one still remains with the same behavior. -h3. `undef` +### `undef` -
+```TODO-lang
 class C
   undef method_name
 end
-
+``` Prohibits the calling of `C#method_name`. It's not just a simple revoking of the definition. If there even were a method in the @@ -2088,46 +2093,47 @@ is `Module#remove_method`. While defining a class, `self` refers to that class, we can call it as follows (Remember that `Class` is a subclass of `Module`.) -
+```TODO-lang
 class C
   remove_method(:method_name)
 end
-
+``` But even with a `remove_method` one cannot cancel the `undef`. It's because the sign put up by `undef` prohibits any kind of searches. ((errata: It can be redefined by using `def`)) -h2. Some more small topics +Some more small topics +====================== -h3. Comments +### Comments -
+```TODO-lang
 # examples of bad comments.
 1 + 1            # compute 1+1.
 alias my_id id   # my_id is an alias of id.
-
+``` From a `#` to the end of line is a comment. It doesn't have a meaning for the program. -h3. Embedded documents +### Embedded documents -
+```TODO-lang
 =begin
 This is an embedded document.
 It's so called because it is embedded in the program.
 Plain and simple.
 =end
-
+``` An embedded document stretches from an `=begin` outside a string at the beginning of a line to a `=end`. The interior can be arbitrary. The program ignores it as a mere comment. -h3. Multi-byte strings +### Multi-byte strings When the global variable @$KCODE@ is set to either @EUC@, @SJIS@ or @UTF8@, strings encoded in euc-jp, shift_jis, or utf8 respectively can be @@ -2139,12 +2145,12 @@ String literals, regular expressions and even operator names can contain multibyte characters. Hence it is possible to do something like this: -
+```TODO-lang
 def 表示( arg )
   puts arg
 end
 
 表示 'にほんご'
-
+``` But I really cannot recommend doing things like that. diff --git a/syntree.textile b/syntree.md similarity index 95% rename from syntree.textile rename to syntree.md index dfdfbc7..f9b87c0 100644 --- a/syntree.textile +++ b/syntree.md @@ -5,10 +5,11 @@ title: "Chapter 12: Syntax tree construction" h1(#chapter). Chapter 12: Syntax tree construction -h2. Node +Node +==== -h3. @NODE@ +### @NODE@ As I've already described, a Ruby program is first converted to a syntax tree. @@ -18,7 +19,7 @@ In @ruby@, all nodes are of type @NODE@.

▼ @NODE@

-
+```TODO-lang
  128  typedef struct RNode {
  129      unsigned long flags;
  130      char *nd_file;
@@ -46,7 +47,7 @@ In @ruby@, all nodes are of type @NODE@.
  152  } NODE;
 
 (node.h)
-
+``` Although you might be able to infer from the struct name @RNode@, nodes are Ruby objects. @@ -76,7 +77,7 @@ And, in @node.h@, the macros to access each union member are available.

▼ the macros to access @NODE@

-
+```TODO-lang
  166  #define nd_head  u1.node
  167  #define nd_alen  u2.argc
  168  #define nd_next  u3.node
@@ -90,16 +91,16 @@ And, in @node.h@, the macros to access each union member are available.
                  :
 
 (node.h)
-
+``` For example, these are used as follows: -
+```TODO-lang
 NODE *head, *tail;
 head->nd_next = tail;    /* head->u3.node = tail */
-
+``` In the source code, it's almost certain that these macros are used. @@ -121,7 +122,7 @@ and conversely we can determine the node types from the macros. -h3. Node Type +### Node Type I said that in the @flags@ of a @NODE@ struct its node type is stored. @@ -131,24 +132,24 @@ A node type can be set by @nd_set_type()@ and obtained by @nd_type()@.

▼ @nd_type nd_set_type@

-
+```TODO-lang
  156  #define nd_type(n) (((RNODE(n))->flags>>FL_USHIFT)&0xff)
  157  #define nd_set_type(n,t) \
  158      RNODE(n)->flags = ((RNODE(n)->flags & ~FL_UMASK) \
                              | (((t)<
+```
 
 
 

▼ @FL_USHIFT FL_UMASK@

-
+```TODO-lang
  418  #define FL_USHIFT    11
  429  #define FL_UMASK  (0xff<
+```
 
 
 It won't be so much trouble if we'll keep focus on around @nd_type@.
@@ -167,7 +168,7 @@ the @nodetype()@ function is also available.
 
 

▼ @nodetype@

-
+```TODO-lang
 4247  static enum node_type
 4248  nodetype(node)                  /* for debug */
 4249      NODE *node;
@@ -176,12 +177,12 @@ the @nodetype()@ function is also available.
 4252  }
 
 (parse.y)
-
+``` -h3. File Name and Line Number +### File Name and Line Number The @nd_file@ of a @NODE@ holds (the pointer to) the name of the file where the @@ -193,7 +194,7 @@ could not be found around here. Actually, the line number is being embedded to

▼ @nd_line nd_set_line@

-
+```TODO-lang
  160  #define NODE_LSHIFT (FL_USHIFT+8)
  161  #define NODE_LMASK  (((long)1<<(sizeof(NODE*)*CHAR_BIT-NODE_LSHIFT))-1)
  162  #define nd_line(n) \
@@ -203,7 +204,7 @@ could not be found around here. Actually, the line number is being embedded to
                              | (((l)&NODE_LMASK) << NODE_LSHIFT))
 
 (node.h)
-
+``` @nd_set_line()@ is fairly spectacular. @@ -224,9 +225,9 @@ The next thing is @NODE_LMASK@. -
+```TODO-lang
 sizeof(NODE*) * CHAR_BIT - NODE_LSHIFT
-
+``` This is the number of the rest of the bits. @@ -234,9 +235,9 @@ Let's assume it is @restbits@. This makes the code a lot simpler. -
+```TODO-lang
 #define NODE_LMASK  (((long)1 << restbits) - 1)
-
+``` Fig.2 shows what the above code seems to be doing. Note that a borrow occurs @@ -255,9 +256,9 @@ Now, let's look at @nd_line()@ again. -
+```TODO-lang
 (RNODE(n)->flags >> NODE_LSHIFT) & NODE_LMASK
-
+``` By the right shift, the unused space is shifted to the LSB. The bitwise AND @@ -276,12 +277,12 @@ the line numbers should wrongly be displayed. Let's try. -
+```TODO-lang
 File.open('overflow.rb', 'w') {|f|
     10000.times { f.puts }
     f.puts 'raise'
 }
-
+``` With my 686 machine, @ruby overflow.rb@ properly displayed 1809 as a line number. @@ -291,7 +292,7 @@ bigger file in order to successfully fail. -h3. @rb_node_newnode()@ +### @rb_node_newnode()@ Lastly let's look at the function @rb_node_newnode()@ that creates a node. @@ -299,7 +300,7 @@ Lastly let's look at the function @rb_node_newnode()@ that creates a node.

▼ @rb_node_newnode()@

-
+```TODO-lang
 4228  NODE*
 4229  rb_node_newnode(type, a0, a1, a2)
 4230      enum node_type type;
@@ -320,7 +321,7 @@ Lastly let's look at the function @rb_node_newnode()@ that creates a node.
 4245  }
 
 (parse.y)
-
+``` We've seen @rb_newobj()@ in the Chapter 5: Garbage collection. It is the function to get a @@ -351,7 +352,8 @@ a struct type that has the above seven members. -h2. Syntax Tree Construction +Syntax Tree Construction +======================== The role of the parser is to convert the source code that is a byte sequence to a @@ -362,7 +364,7 @@ In this section, we'll look at the construction process of that syntax tree. -h3. @YYSTYPE@ +### @YYSTYPE@ Essentially this chapter is about actions, @@ -372,7 +374,7 @@ Let's look at the @%union@ of @ruby@ first.

▼ @%union@ declaration

-
+```TODO-lang
  170  %union {
  171      NODE *node;
  172      ID id;
@@ -381,7 +383,7 @@ Let's look at the @%union@ of @ruby@ first.
  175  }
 
 (parse.y)
-
+``` @struct RVarmap@ is a struct used by the evaluator and holds a block local variable. @@ -390,7 +392,7 @@ You can tell the rest. The most used one is of course @node@. -h3. Landscape with Syntax Trees +### Landscape with Syntax Trees I mentioned that looking at the fact first is a theory of code reading. @@ -401,7 +403,7 @@ we should start with looking at the answer (the syntax tree). It's also nice using debuggers to observe every time, but you can visualize the syntax tree more handily by using the tool @nodedump@ contained in the attached CD-ROM, -This tool is originally the NodeDump made by "Pragmatic Programmers":http://www.pragmaticprogrammers.com +This tool is originally the NodeDump made by [Pragmatic Programmers](http://www.pragmaticprogrammers.com) and remodeled for this book. The original version shows quite explanatory output, but this remodeled version deeply and directly displays the appearance of the @@ -412,7 +414,7 @@ For example, in order to dump the simple expression @m(a)@, you can do as follow -
+```TODO-lang
 % ruby -rnodedump -e 'm(a)'
 NODE_NEWLINE
 nd_file = "-e"
@@ -427,7 +429,7 @@ nd_next:
             NODE_VCALL
             nd_mid = 9625 (a)
         nd_next = (null)
-
+``` The @-r@ option is used to specify the library to be load, @@ -477,7 +479,7 @@ tree in other words. -h3. Leaf +### Leaf First, let's start with the edges that are the leaves of the syntax tree. @@ -486,11 +488,11 @@ belong to @primary@ and are particularly simple even among the @primary@ rules. -
+```TODO-lang
 % ruby -rnodedump-short -e '1'
 NODE_LIT
 nd_lit = 1:Fixnum
-
+``` 1 as a numeric value. There's not any twist. However, notice that what is @@ -498,11 +500,11 @@ stored in the node is not 1 of C but 1 of Ruby (1 of @Fixnum@). This is because -
+```TODO-lang
 % ruby -rnodedump-short -e ':sym'
 NODE_LIT
 nd_lit = 9617:Symbol
-
+``` This way, @Symbol@ is represented by the same @NODE_LIT@ when it becomes a syntax tree. @@ -516,11 +518,11 @@ designing it so that it becomes convenient when executing is the right thing to -
+```TODO-lang
 % ruby -rnodedump-short -e '"a"'
 NODE_STR
 nd_lit = "a":String
-
+``` A string. This is also a Ruby string. @@ -528,7 +530,7 @@ String literals are copied when actually used. -
+```TODO-lang
 % ruby -rnodedump -e '[0,1]'
 NODE_NEWLINE
 nd_file = "-e"
@@ -546,7 +548,7 @@ nd_next:
             NODE_LIT
             nd_lit = 1:Fixnum
         nd_next = (null)
-
+``` Array. I can't say this is a leaf, but let's allow this to be here because it's @@ -557,14 +559,14 @@ you will understand after finishing to read this section. -h3. Branch +### Branch Next, we'll focus on "combinations" that are branches. @if@ will be taken as an example. -h4. @if@ +#### @if@ I feel like @if@ is always used as an example, that's because its structure is @@ -578,18 +580,18 @@ For example, let's convert this code to a syntax tree.

▼The Source Program

-
+```TODO-lang
 if true
   'true expr'
 else
   'false expr'
 end
-
+```

▼Its syntax tree expression

-
+```TODO-lang
 NODE_IF
 nd_cond:
     NODE_TRUE
@@ -599,7 +601,7 @@ nd_body:
 nd_else:
     NODE_STR
     nd_lit = "false expr":String
-
+``` Here, the previously described @nodedump-short@ is used, so @NODE_NEWLINE@ @@ -613,7 +615,7 @@ Then, let's look at the code to build this.

▼ @if@ rule

-
+```TODO-lang
 1373                  | kIF expr_value then
 1374                    compstmt
 1375                    if_tail
@@ -624,7 +626,7 @@ Then, let's look at the code to build this.
 1380                      }
 
 (parse.y)
-
+``` It seems that @NEW_IF()@ is the macro to create @NODE_IF@. Among the values of @@ -634,11 +636,11 @@ of the rule and @$n@ are: -
+```TODO-lang
 kIF    expr_value  then  compstmt  if_tail  kEND
  $1          $2      $3        $4       $5    $6
 NEW_IF(expr_value,       compstmt, if_tail)
-
+``` this way. In other words, @expr_value@ is the condition expression, @compstmt@ @@ -652,11 +654,11 @@ are defined @node.h@. Let's look at @NEW_IF()@.

▼ @NEW_IF()@

-
+```TODO-lang
  243  #define NEW_IF(c,t,e) rb_node_newnode(NODE_IF,c,t,e)
 
 (node.h)
-
+``` As for the parameters, @@ -678,9 +680,9 @@ untouched. Therefore, it needs to be corrected by @fixpos()@. -
+```TODO-lang
 fixpos(dest, src)
-
+``` This way, the line number of the node @dest@ is set to the one of the node @src@. @@ -691,7 +693,7 @@ of the whole @if@ expression. -h4. @elsif@ +#### @elsif@ Subsequently, let's look at the rule of @if_tail@. @@ -699,7 +701,7 @@ Subsequently, let's look at the rule of @if_tail@.

▼ @if_tail@

-
+```TODO-lang
 1543  if_tail         : opt_else
 1544                  | kELSIF expr_value then
 1545                    compstmt
@@ -716,7 +718,7 @@ Subsequently, let's look at the rule of @if_tail@.
 1557                      }
 
 (parse.y)
-
+``` First, this rule expresses "a list ends with @opt_else@ after zero or more @@ -726,13 +728,13 @@ understand this by extracting arbitrary times. -
+```TODO-lang
 if_tail: kELSIF .... if_tail
 if_tail: kELSIF .... kELSIF .... if_tail
 if_tail: kELSIF .... kELSIF .... kELSIF .... if_tail
 if_tail: kELSIF .... kELSIF .... kELSIF .... opt_else
 if_tail: kELSIF .... kELSIF .... kELSIF .... kELSE compstmt
-
+``` Next, let's focus on the actions, surprisingly, @elsif@ uses the same @NEW_IF()@ as @if@. @@ -740,7 +742,7 @@ It means, the below two programs will lose the difference after they become synt -
+```TODO-lang
 if cond1                  if cond1
   body1                     body1
 elsif cond2               else
@@ -754,7 +756,7 @@ end                           else
                               end
                             end
                           end
-
+``` Come to think of it, in C language and such, there's no distinction between @@ -779,7 +781,7 @@ and so on. These pairs also become equal to each other. -h4. Left Recursive and Right Recursive +#### Left Recursive and Right Recursive By the way, the symbol of a list was always written at the left side when expressing a list @@ -788,10 +790,10 @@ I'll show only the crucial part again. -
+```TODO-lang
 if_tail: opt_else
        | kELSIF ... if_tail
-
+``` Surely, it is opposite of the previous examples. @if_tail@ which is the symbol @@ -802,10 +804,10 @@ In fact, there's another established way of expressing lists, -
+```TODO-lang
 list: END_ITEM
     | ITEM list
-
+``` when you write in this way, it becomes the list that contains continuous zero @@ -857,7 +859,7 @@ writing a book of @yacc@. -h3. Trunk +### Trunk Leaf, branch, and finally, it's trunk. @@ -866,11 +868,11 @@ Let's look at how the list of statements are joined.

▼The Source Program

-
+```TODO-lang
 7
 8
 9
-
+``` The dump of the corresponding syntax tree is shown below. @@ -879,7 +881,7 @@ This is not @nodedump-short@ but in the perfect form.

▼Its Syntax Tree

-
+```TODO-lang
 NODE_BLOCK
 nd_head:
     NODE_NEWLINE
@@ -907,7 +909,7 @@ nd_next:
                 NODE_LIT
                 nd_lit = 9:Fixnum
         nd_next = (null)
-
+``` We can see the list of @NODE_BLOCK@ is created and @NODE_NEWLINE@ are attached @@ -927,7 +929,7 @@ Let's also see the code.

▼ @stmts@

-
+```TODO-lang
  354  stmts           : none
  355                  | stmt
  356                      {
@@ -939,7 +941,7 @@ Let's also see the code.
  362                      }
 
 (parse.y)
-
+``` @newline_node()@ caps @NODE_NEWLINE@, @block_append()@ appends it to the list. @@ -949,7 +951,7 @@ Let's look at the content only of the @block_append()@. -h4. @block_append()@ +#### @block_append()@ It this function, the error checks are in the very middle and obstructive. @@ -958,7 +960,7 @@ Thus I'll show the code without that part.

▼ @block_append()@ (omitted)

-
+```TODO-lang
 4285  static NODE*
 4286  block_append(head, tail)
 4287      NODE *head, *tail;
@@ -990,7 +992,7 @@ Thus I'll show the code without that part.
 4332  }
 
 (parse.y)
-
+``` According to the previous syntax tree dump, @NEW_BLOCK@ was a linked list uses @nd_next@. @@ -1013,7 +1015,7 @@ Fig.6: Appending is easy. -h3. The two types of lists +### The two types of lists Now, I've explained the outline so far. @@ -1055,7 +1057,8 @@ I'd like you to recall this and think "Oh, this uses the length". -h2. Semantic Analysis +Semantic Analysis +================= As I briefly mentioned at the beginning of Part 2, there are two types of analysis @@ -1066,7 +1069,7 @@ analysis inside actions. -h3. Errors inside actions +### Errors inside actions What does the semantic analysis precisely mean? @@ -1127,12 +1130,12 @@ Comparing to it, the current -
+```TODO-lang
 % ruby -e 'self = 1'
 -e:1: Can't change the value of self
 self = 1
       ^
-
+``` this error is much more friendly. @@ -1160,9 +1163,9 @@ Therefore, for example, the next expression is odd, -
+```TODO-lang
 i = return(1)
-
+``` Since this kind of expressions are clearly due to misunderstanding or simple mistakes, @@ -1170,7 +1173,7 @@ it's better to reject when compiling. Next, we'll look at @value_expr@ which is one of the functions to check if it takes a value. -h3. @value_expr()@ +### @value_expr()@ @value_expr()@ is the function to check if it is an @expr@ that has a value. @@ -1178,7 +1181,7 @@ h3. @value_expr()@

▼ @value_expr()@

-
+```TODO-lang
 4754  static int
 4755  value_expr(node)
 4756      NODE *node;
@@ -1235,10 +1238,10 @@ h3. @value_expr()@
 4807  }
 
 (parse.y)
-
+``` -h4. Algorithm +#### Algorithm Summary: It sequentially checks the nodes of the tree, if it hits "an expression certainly not having its value", it means the tree does not have any value. @@ -1254,7 +1257,7 @@ Here:

▼ check the value of @arg@ by using @value_expr()@

-
+```TODO-lang
 1055  arg_value       : arg
 1056                      {
 1057                          value_expr($1);
@@ -1262,7 +1265,7 @@ Here:
 1059                      }
 
 (parse.y)
-
+``` Inside of this argument @$1@, there can also be other nesting method calls again. @@ -1283,7 +1286,7 @@ For example, in the next case: -
+```TODO-lang
 def method
   if true
     return 1
@@ -1292,14 +1295,14 @@ def method
   end
   5
 end
-
+``` This @if@ statement does not need a value.
But in the next case, its value is necessary. -
+```TODO-lang
 def method( arg )
   tmp = if arg
         then 3
@@ -1307,7 +1310,7 @@ def method( arg )
         end
   tmp * tmp / 3.5
 end
-
+``` So, in this case, the @if@ statement must be checked when checking the entire @@ -1317,7 +1320,7 @@ statement of @value_expr()@. -h4. Removing Tail Recursion +#### Removing Tail Recursion By the way, when looking over the whole @value_expr@, we can see that there's @@ -1325,7 +1328,7 @@ the following pattern appears frequently: -
+```TODO-lang
 while (node) {
     switch (nd_type(node)) {
       case NODE_XXXX:
@@ -1335,16 +1338,16 @@ while (node) {
          :
     }
 }
-
+``` This expression will also carry the same meaning after being modified to the below: -
+```TODO-lang
 return value_expr(node->nd_xxxx)
-
+``` A code like this which does a recursive call just before @return@ is called a @@ -1360,11 +1363,11 @@ For example, take a look at the @NODE_IF@ of @value_expr()@, -
+```TODO-lang
 if (!value_expr(node->nd_body)) return Qfalse;
 node = node->nd_else;
 break;
-
+``` As shown above, the first time is a recursive call. @@ -1372,9 +1375,9 @@ Rewriting this to the form of using @return@, -
+```TODO-lang
 return value_expr(node->nd_body) && value_expr(node->nd_else);
-
+``` If the left @value_expr()@ is false, the right @value_expr()@ is also executed. @@ -1385,7 +1388,7 @@ Hence, it can't be extracted to @goto@. -h3. The whole picture of the value check +### The whole picture of the value check As for value checks, we won't read the functions further. @@ -1406,10 +1409,11 @@ Fig.7: the call graph of the value check functions -h2. Local Variables +Local Variables +=============== -h3. Local Variable Definitions +### Local Variable Definitions The variable definitions in Ruby are really various. @@ -1426,10 +1430,10 @@ For example, as follows: -
+```TODO-lang
 lvar = nil
 p lvar      # being defined
-
+``` In this case, as the assignment to @lvar@ is written at the first line, @@ -1438,11 +1442,11 @@ When it is undefined, it ends up with a runtime exception @NameError@ as follows -
+```TODO-lang
 % ruby lvar.rb
 lvar.rb:1: undefined local variable or method `lvar'
 for # (NameError)
-
+``` Why does it say @"local variable or method"@? @@ -1458,12 +1462,12 @@ though it was not assigned. The initial value of a defined variable is nil. -
+```TODO-lang
 if false
   lvar = "this assigment will never be executed"
 end
 p lvar   # shows nil
-
+``` Moreover, since it is defined "when" it "appears", the definition has to be @@ -1472,10 +1476,10 @@ For example, in the next case, it is not defined. -
+```TODO-lang
 p lvar       # not defined !
 lvar = nil   # although appearing here ...
-
+``` Be careful about the point of "in the symbol sequence". @@ -1487,9 +1491,9 @@ Therefore, this produces @NameError@. -
+```TODO-lang
 p(lvar) if lvar = true
-
+``` What we've learned by now is that the local variables are extremely influenced @@ -1502,7 +1506,7 @@ And in fact, it is true. In @ruby@, the parser defines local variables. -h3. Block Local Variables +### Block Local Variables The local variables newly defined in an iterator block are called block local @@ -1514,7 +1518,7 @@ We'll look at how is the difference from now on. -h3. The data structure +### The data structure We'll start with the local variable table @struct local_vars@. @@ -1522,7 +1526,7 @@ We'll start with the local variable table @struct local_vars@.

▼ @struct local_vars@

-
+```TODO-lang
 5174  static struct local_vars {
 5175      ID *tbl;                    /* the table of local variable names */
 5176      int nofree;                 /* whether it is used from outside */
@@ -1533,7 +1537,7 @@ We'll start with the local variable table @struct local_vars@.
 5181  } *lvtbl;
 
 (parse.y)
-
+``` The member name @prev@ indicates that the @struct local_vars@ is a @@ -1549,7 +1553,7 @@ This is used to store the block local variables.

▼ @struct RVarmap@

-
+```TODO-lang
   52  struct RVarmap {
   53      struct RBasic super;
   54      ID id;                  /* the variable name */
@@ -1558,7 +1562,7 @@ This is used to store the block local variables.
   57  };
 
 (env.h)
-
+``` @@ -1579,7 +1583,7 @@ Fig.8: The image of local variable tables at runtime -h3. Local Variable Scope +### Local Variable Scope When looking over the list of function names of @parse.y@, @@ -1591,7 +1595,7 @@ So first, let's find out the places where using these functions.

▼ @local_push() local_pop()@ used examples

-
+```TODO-lang
 1475                  | kDEF fname
 1476                      {
 1477                          $$ = cur_mid;
@@ -1615,7 +1619,7 @@ So first, let's find out the places where using these functions.
 1493                      }
 
 (parse.y)
-
+``` At @def@, I could find the place where it is used. It can also be found in class @@ -1633,11 +1637,11 @@ Moreover, I also searched @local_cnt()@.

▼ @NEW_LASGN()@

-
+```TODO-lang
  269  #define NEW_LASGN(v,val) rb_node_newnode(NODE_LASGN,v,val,local_cnt(v))
 
 (node.h)
-
+``` This is found in @node.h@. Even though there are also the places where using in @parse.y@, @@ -1665,12 +1669,12 @@ Fig.9: the flow of the local variable management Then, let's look at the content of the function. -h3. @push@ and @pop@ +### @push@ and @pop@

▼ @local_push()@

-
+```TODO-lang
 5183  static void
 5184  local_push(top)
 5185      int top;
@@ -1693,7 +1697,7 @@ h3. @push@ and @pop@
 5202  }
 
 (parse.y)
-
+``` As we expected, it seems that @struct local_vars@ is used as a stack. @@ -1706,7 +1710,7 @@ Subsequently, we'll look at @local_pop()@ and @local_tbl()@ at the same time.

▼ @local_tbl local_pop@

-
+```TODO-lang
 5218  static ID*
 5219  local_tbl()
 5220  {
@@ -1729,7 +1733,7 @@ Subsequently, we'll look at @local_pop()@ and @local_tbl()@ at the same time.
 5216  }
 
 (parse.y)
-
+``` I'd like you to look at @local_tbl()@. @@ -1757,7 +1761,7 @@ at the index 0 of the @lvtbl->tbl@. -h3. Adding variables +### Adding variables The function (which seems) to add a local variable is @local_cnt()@. @@ -1765,7 +1769,7 @@ The function (which seems) to add a local variable is @local_cnt()@.

▼ @local_cnt()@

-
+```TODO-lang
 5246  static int
 5247  local_cnt(id)
 5248      ID id;
@@ -1781,7 +1785,7 @@ The function (which seems) to add a local variable is @local_cnt()@.
 5258  }
 
 (parse.y)
-
+``` This scans @lvtbl->tbl@ and searches what is equals to @id@. @@ -1824,7 +1828,7 @@ It is shown below, let's make sure.

▼ @local_append()@

-
+```TODO-lang
 5225  static int
 5226  local_append(id)
 5227      ID id;
@@ -1847,7 +1851,7 @@ It is shown below, let's make sure.
 5244  }
 
 (parse.y)
-
+``` It seems definitely true. @lvtbl->tbl@ is an array of the local variable names, @@ -1875,7 +1879,7 @@ thus it's necessary that the spaces are always allocated. -h3. Summary of local variables +### Summary of local variables Since the description of local variables were complex in various ways, @@ -1903,7 +1907,7 @@ Fig.11: correspondences between local variable names and the return values -h3. Block Local Variables +### Block Local Variables The rest is @dyna_vars@ which is a member of @struct local_vars@. @@ -1917,7 +1921,7 @@ Moreover, here is the place where these are used.

▼ an example using @dyna_push dyna_pop@

-
+```TODO-lang
 1651  brace_block     : '{'
 1652                      {
 1653                          $$ = dyna_push();
@@ -1931,7 +1935,7 @@ Moreover, here is the place where these are used.
 1661                      }
 
 (parse.y)
-
+``` @push@ at the beginning of an iterator block, @pop@ at the end. @@ -1942,7 +1946,7 @@ Now, we are going to look at the functions.

▼ @dyna_push()@

-
+```TODO-lang
 5331  static struct RVarmap*
 5332  dyna_push()
 5333  {
@@ -1954,7 +1958,7 @@ Now, we are going to look at the functions.
 5339  }
 
 (parse.y)
-
+``` Increasing @lvtbl->dlev@ seems the mark indicates the existence of the block @@ -1964,7 +1968,7 @@ Meanwhile, @rb_dvar_push()@ is ...

▼ @rb_dvar_push()@

-
+```TODO-lang
  691  void
  692  rb_dvar_push(id, value)
  693      ID id;
@@ -1974,7 +1978,7 @@ Meanwhile, @rb_dvar_push()@ is ...
  697  }
 
 (eval.c)
-
+``` It creates a @struct RVarmap@ that has the variable name @id@ and the value @@ -1998,7 +2002,7 @@ Next, @dyna_pop()@.

▼ @dyna_pop()@

-
+```TODO-lang
 5341  static void
 5342  dyna_pop(vars)
 5343      struct RVarmap* vars;
@@ -2008,7 +2012,7 @@ Next, @dyna_pop()@.
 5347  }
 
 (parse.y)
-
+``` By reducing @lvtbl->dlev@, it writes down the fact that the block local @@ -2024,7 +2028,7 @@ So, I did plenty of @grep@ with @dvar@ and @dyna@, and this code was found.

▼ @assignable()@ (partial)

-
+```TODO-lang
 4599  static NODE*
 4600  assignable(id, val)
 4601      ID id;
@@ -2035,7 +2039,7 @@ So, I did plenty of @grep@ with @dvar@ and @dyna@, and this code was found.
 4635              return NEW_DASGN_CURR(id, val);
 
 (parse.y)
-
+``` @assignable()@ is the function to create a node relates to assignments, @@ -2047,7 +2051,7 @@ by using @rb_dvar_push()@ that we've just seen. -h3. @ruby_dyna_vars@ in the parser +### @ruby_dyna_vars@ in the parser Now, taking the above all into considerations, let's imagine the appearance of @@ -2066,12 +2070,12 @@ I'd like you to focus on this part: -
+```TODO-lang
 $$ = dyna_push();    /* what assigned into $$ is ... */
         :
         :
 dyna_pop($2);        /* …… appears at $2 */
-
+``` @dyna_push()@ returns the @ruby_dyna_vars@ at the moment. @@ -2082,7 +2086,7 @@ Therefore, when parsing the following program, -
+```TODO-lang
 iter {
     a = nil
     iter {
@@ -2099,7 +2103,7 @@ iter {
     }
     # nesting level 1
 }
-
+``` Fig.12 shows the @ruby_dyna_vars@ in this situation. diff --git a/thread.textile b/thread.md similarity index 96% rename from thread.textile rename to thread.md index a65125b..61fb37a 100644 --- a/thread.textile +++ b/thread.md @@ -2,12 +2,14 @@ layout: default title: "Chapter 19: Threads" --- -h1. Chapter 19: Threads +Chapter 19: Threads +------------------- -h2. Outline +Outline +======= -h3. Ruby Interface +### Ruby Interface Come to think of it, I feel I have not introduced an actual code to use Ruby threads. @@ -15,7 +17,7 @@ This is not so special, but here I'll introduce it just in case. -
+```TODO-lang
 Thread.fork {
     while true
       puts 'forked thread'
@@ -24,7 +26,7 @@ Thread.fork {
 while true
   puts 'main thread'
 end
-
+``` When executing this program, @@ -49,7 +51,7 @@ and the below APIs can be used to control a thread itself. -h3. `ruby` Thread +### `ruby` Thread Threads are supposed to "run all together", @@ -75,7 +77,7 @@ limited to one. -h3. Is it preemptive? +### Is it preemptive? I'll describe about the traits of `ruby` threads in more detail. @@ -130,7 +132,7 @@ Therefore, non-preemptive at C level is a reasonable choice for `ruby`. -h3. Management System +### Management System We've understand `ruby` thread is non-preemptive at C level. @@ -144,7 +146,7 @@ Let's look at the variables and the data types to manage threads.

▼ the structure to manage threads

-
+```TODO-lang
  864  typedef struct thread * rb_thread_t;
  865  static rb_thread_t curr_thread = 0;
  866  static rb_thread_t main_thread;
@@ -153,7 +155,7 @@ Let's look at the variables and the data types to manage threads.
 7302      struct thread *next, *prev;
 
 (eval.c)
-
+``` @@ -189,7 +191,7 @@ Only by this, we can run all threads equally to some extent. -h3. What does switching threads mean? +### What does switching threads mean? By the way, what is a thread in the first place? @@ -218,7 +220,7 @@ Or, it is called "context-switch". -h3. The way of context-switching +### The way of context-switching The rest talk is how to switch contexts. @@ -232,7 +234,7 @@ The spaces for both purposes are respectively prepared in `rb_thread_t`.

▼ `struct thread` (partial)

-
+```TODO-lang
 7301  struct thread {
 7302      struct thread *next, *prev;
 7303      jmp_buf context;
@@ -260,7 +262,7 @@ The spaces for both purposes are respectively prepared in `rb_thread_t`.
 7335      int safe;                   /* ruby_safe_level */
 
 (eval.c)
-
+``` As shown above, there are the members that seem to correspond to `ruby_frame` @@ -293,19 +295,19 @@ The place to store the stack properly exists in `struct thread`.

▼ `struct thread` (partial)

-
+```TODO-lang
 7310      int   stk_len;      /* the stack length */
 7311      int   stk_max;      /* the size of memory allocated for stk_ptr */
 7312      VALUE*stk_ptr;      /* the copy of the stack */
 7313      VALUE*stk_pos;      /* the position of the stack */
 
 (eval.c)
-
+``` -h3. How the explanation goes +### How the explanation goes So far, I've talked about various things, but the important points can be @@ -324,14 +326,15 @@ respectively. -h2. Trigger +Trigger +======= To begin with, it's the first point, when to switch threads. In other words, what is the cause of switching threads. -h3. Waiting I/O +### Waiting I/O For example, when trying to read in something by calling `IO#gets` or `IO#read`, @@ -343,7 +346,7 @@ Below is the interface of `getc`.

▼ `rb_getc()`

-
+```TODO-lang
 1185  int
 1186  rb_getc(f)
 1187      FILE *f;
@@ -361,7 +364,7 @@ Below is the interface of `getc`.
 1199  }
 
 (io.c)
-
+``` `READ_DATA_PENDING(f)` is a macro to check if the content of the buffer of the @@ -379,7 +382,7 @@ What is it? Let's see the inside of `rb_thread_wait_fd()`.

▼ `rb_thread_wait_fd()`

-
+```TODO-lang
 8047  void
 8048  rb_thread_wait_fd(fd)
 8049      int fd;
@@ -395,7 +398,7 @@ What is it? Let's see the inside of `rb_thread_wait_fd()`.
 8059  }
 
 (eval.c)
-
+``` There's `rb_thread_schedule()` at the last line. This function is the "direct cause". @@ -421,7 +424,7 @@ respectively. -h3. Waiting the other thread +### Waiting the other thread After understanding threads are switched at the timing of `rb_thread_schedule()`, @@ -432,7 +435,7 @@ Then by scanning, I found it in the function named `rb_thread_join()`.

▼ `rb_thread_join()` (partial)

-
+```TODO-lang
 8227  static int
 8228  rb_thread_join(th, limit)
 8229      rb_thread_t th;
@@ -447,7 +450,7 @@ Then by scanning, I found it in the function named `rb_thread_join()`.
 8248          rb_thread_schedule();
 
 (eval.c)
-
+``` @@ -459,7 +462,7 @@ Because of this, the second reason to switch is found. -h3. Waiting For Time +### Waiting For Time Moreover, also in the function named `rb_thread_wait_for()`, @@ -469,7 +472,7 @@ This is the substance of (Ruby's) `sleep` and such.

▼ `rb_thread_wait_for` (simplified)

-
+```TODO-lang
 8080  void
 8081  rb_thread_wait_for(time)
 8082      struct timeval time;
@@ -485,7 +488,7 @@ This is the substance of (Ruby's) `sleep` and such.
 8129  }
 
 (eval.c)
-
+``` `timeofday()` returns the current time. @@ -497,7 +500,7 @@ specific time". -h3. Switch by expirations +### Switch by expirations In the above all cases, @@ -512,7 +515,7 @@ Then, how long a thread can run by the time when it will have to stop, is what I'll talk about next. -h4. `setitimer` +#### `setitimer` Since it is the same every now and then, @@ -524,7 +527,7 @@ It is here.

▼ `catch_timer()`

-
+```TODO-lang
 8574  static void
 8575  catch_timer(sig)
 8576      int sig;
@@ -541,7 +544,7 @@ It is here.
 8587  }
 
 (eval.c)
-
+``` This seems something relating to signals. @@ -552,7 +555,7 @@ then it was used around here:

▼ `rb_thread_start_0()` (partial)

-
+```TODO-lang
 8620  static VALUE
 8621  rb_thread_start_0(fn, arg, th_arg)
 8622      VALUE (*fn)();
@@ -574,7 +577,7 @@ then it was used around here:
 8643  #endif
 
 (eval.c)
-
+``` This means, `catch_timer` is a signal handler of `SIGVTALRM`. @@ -604,12 +607,12 @@ Then, I'd like you to see the code of `catch_timer()` again. -
+```TODO-lang
 if (rb_trap_immediate) {
     rb_thread_schedule();
 }
 else rb_thread_pending = 1;
-
+``` There's a required condition that is doing `rb_thread_schedule()` only when @@ -627,7 +630,7 @@ This variable is used in the following place.

▼ `CHECK_INTS` − `HAVE_SETITIMER`

-
+```TODO-lang
   73  #if defined(HAVE_SETITIMER) && !defined(__BOW__)
   74  EXTERN int rb_thread_pending;
   75  # define CHECK_INTS do {\
@@ -639,7 +642,7 @@ This variable is used in the following place.
   81  } while (0)
 
 (rubysig.h)
-
+``` This way, inside of `CHECK_INTS`, `rb_thread_pending` is checked and @@ -657,7 +660,7 @@ Therefore, it is natural to exist in the important functions. -h4. `tick` +#### `tick` We understood the case when there's `setitimer`. @@ -668,7 +671,7 @@ It is the definition of the `#else` side.

▼ `CHECK_INTS` − `not HAVE_SETITIMER`

-
+```TODO-lang
   84  EXTERN int rb_thread_tick;
   85  #define THREAD_TICK 500
   86  #define CHECK_INTS do {\
@@ -684,7 +687,7 @@ It is the definition of the `#else` side.
   96  } while (0)
 
 (rubysig.h)
-
+``` Every time going through `CHECK_INTS`, decrement `rb_thread_tick`. @@ -695,7 +698,8 @@ after `THREAD_TICK` (=500) times going through `CHECK_INTS`. -h2. Scheduling +Scheduling +========== The second point is to which thread to switch. @@ -704,7 +708,7 @@ What solely responsible for this decision is `rb_thread_schedule()`. -h3. `rb_thread_schedule()` +### `rb_thread_schedule()` The important functions of `ruby` are always huge. @@ -714,7 +718,7 @@ Let's exhaustively divide it into portions.

▼ `rb_thread_schedule()` (outline)

-
+```TODO-lang
 7819  void
 7820  rb_thread_schedule()
 7821  {
@@ -751,7 +755,7 @@ Let's exhaustively divide it into portions.
 8045  }
 
 (eval.c)
-
+``` (A) When there's only one thread, this does not do anything and returns immediately. @@ -775,7 +779,7 @@ let's first study about `select` in advance here. -h3. `select` +### `select` `select` is a system call to wait until the preparation for reading or writing a certain file will be completed. @@ -783,11 +787,11 @@ Its prototype is this: -
+```TODO-lang
 int select(int max,
            fd_set *readset, fd_set *writeset, fd_set *exceptset,
            struct timeval *timeout);
-
+``` In the variable of type `fd_set`, a set of `fd` that we want to check is stored. @@ -805,13 +809,13 @@ I'll talk about `fd_set` in detail.

▼ `fd_set` maipulation

-
+```TODO-lang
 fd_set set;
 
 FD_ZERO(&set)       /* initialize */
 FD_SET(fd, &set)    /* add a file descriptor fd to the set */
 FD_ISSET(fd, &set)  /* true if fd is in the set */
-
+``` `fd_set` is typically a bit array, @@ -829,7 +833,7 @@ I'll show a simple usage example of `select`.

▼ a usage exmple of `select`

-
+```TODO-lang
 #include 
 #include 
 #include 
@@ -847,7 +851,7 @@ main(int argc, char **argv)
     read(STDIN_FILENO, buf, 1024);  /* success without delay */
     exit(0);
 }
-
+``` @@ -866,7 +870,7 @@ And a little more detailed example code is put in the attached CD-ROM -h3. Preparations for `select` +### Preparations for `select` Now, we'll go back to the code of `rb_thread_schedule()`. @@ -876,7 +880,7 @@ I'll show the content in shortened form.

▼ `rb_thread_schedule()` − preparations for `select`

-
+```TODO-lang
 7848    again:
           /* initialize the variables relating to select */
 7849      max = -1;
@@ -907,7 +911,7 @@ I'll show the content in shortened form.
 7901      END_FOREACH_FROM(curr, th);
 
 (eval.c)
-
+``` Whether it is supposed to be or not, @@ -917,19 +921,19 @@ These two are defined as follows:

▼ `FOREACH_THREAD_FROM`

-
+```TODO-lang
 7360  #define FOREACH_THREAD_FROM(f,x) x = f; do { x = x->next;
 7361  #define END_FOREACH_FROM(f,x) } while (x != f)
 
 (eval.c)
-
+``` Let's extract them for better understandability. -
+```TODO-lang
 th = curr;
 do {
     th = th->next;
@@ -937,7 +941,7 @@ do {
         .....
     }
 } while (th != curr);
-
+``` This means: follow the circular list of threads from the next of `curr` @@ -966,7 +970,7 @@ As for its code, let's see it just in case.

▼ `rb_thread_schedule()` − `select` preparation − `join` wait

-
+```TODO-lang
 7861          if (th->wait_for & WAIT_JOIN) {
 7862              if (rb_thread_dead(th->join)) {
 7863                  th->status = THREAD_RUNNABLE;
@@ -975,7 +979,7 @@ As for its code, let's see it just in case.
 7866          }
 
 (eval.c)
-
+``` The meaning of `rb_thread_dead()` is obvious because of its name. @@ -984,7 +988,7 @@ It determines whether or not the thread of the argument has finished. -h3. Calling `select` +### Calling `select` By now, we've figured out whether `select` is necessary or not, @@ -999,7 +1003,7 @@ and let it only check if I/O was completed.

▼ `rb_thread_schedule()` − `select`

-
+```TODO-lang
 7904      if (need_select) {
 7905          /* convert delay into timeval */
 7906          /* if theres immediately invocable threads, do only I/O checks */
@@ -1034,7 +1038,7 @@ and let it only check if I/O was completed.
 7994      }
 
 (eval.c)
-
+``` The first half of the block is as written in the comment. @@ -1051,7 +1055,7 @@ What are meaningful are the rest two. -h4. Timeout +#### Timeout When `select` is timeout, a thread of time wait or `select` wait may become @@ -1059,7 +1063,7 @@ invocable. Check about it and search runnable threads. If it is found, set `THREAD_RUNNABLE` to it. -h4. Completing normally +#### Completing normally If `select` is normally completed, @@ -1070,7 +1074,7 @@ If it is found, set `THREAD_RUNNABLE` to it. -h3. Decide the next thread +### Decide the next thread Taking all the information into considerations, @@ -1081,7 +1085,7 @@ Since all what was invocable and all what had finished waiting and so on became

▼ `rb_thread_schedule()` − decide the next thread

-
+```TODO-lang
 7996      FOREACH_THREAD_FROM(curr, th) {
 7997          if (th->status == THREAD_TO_KILL) {              /*(A)*/
 7998              next = th;
@@ -1095,7 +1099,7 @@ Since all what was invocable and all what had finished waiting and so on became
 8006      END_FOREACH_FROM(curr, th);
 
 (eval.c)
-
+``` (A) if there's a thread that is about to finish, @@ -1127,7 +1131,7 @@ level, the perfect detection is nearly impossible. -h3. Switching Threads +### Switching Threads The next thread to invoke has been determined. @@ -1141,7 +1145,8 @@ I'll start a new section. -h2. Context Switch +Context Switch +============== The last third point is thread-switch, @@ -1149,7 +1154,7 @@ and it is context-switch. This is the most interesting part of threads of `ruby`. -h3. The Base Line +### The Base Line Then we'll start with the tail of `rb_thread_schedule()`. @@ -1159,12 +1164,12 @@ I'll go with a significantly simplified version.

▼ `rb_thread_schedule()` (context switch)

-
+```TODO-lang
 if (THREAD_SAVE_CONTEXT(curr)) {
     return;
 }
 rb_thread_restore_context(next, RESTORE_NORMAL);
-
+``` As for the part of `THREAD_SAVE_CONTEXT()`, @@ -1173,7 +1178,7 @@ we need to extract the content at several places in order to understand.

▼ `THREAD_SAVE_CONTEXT()`

-
+```TODO-lang
 7619  #define THREAD_SAVE_CONTEXT(th) \
 7620      (rb_thread_save_context(th),thread_switch(setjmp((th)->context)))
 
@@ -1199,14 +1204,14 @@ we need to extract the content at several places in order to understand.
 7617  }
 
 (eval.c)
-
+``` If I merge the three then extract it, here is the result: -
+```TODO-lang
 rb_thread_save_context(curr);
 switch (setjmp(curr->context)) {
   case 0:
@@ -1221,7 +1226,7 @@ switch (setjmp(curr->context)) {
     return;
 }
 rb_thread_restore_context(next, RESTORE_NORMAL);
-
+``` At both of the return value of `setjmp()` and `rb_thread_restore_context()`, @@ -1233,12 +1238,12 @@ And if we will imagine the meaning also from the function names, -
+```TODO-lang
 save the context of the current thread
 setjmp
 restore the context of the next thread
 longjmp
-
+``` The rough main flow would probably look like this. @@ -1264,14 +1269,14 @@ Let's look at each of them in sequential order. -h3. `rb_thread_save_context()` +### `rb_thread_save_context()` Now, we'll start with `rb_thread_save_context()`, which saves a context.

▼ `rb_thread_save_context()` (simplified)

-
+```TODO-lang
 7539  static void
 7540  rb_thread_save_context(th)
 7541      rb_thread_t th;
@@ -1296,7 +1301,7 @@ Now, we'll start with `rb_thread_save_context()`, which saves a context.
       }
 
 (eval.c)
-
+``` @@ -1335,7 +1340,7 @@ It must be called when the target is the entire stack. -h3. `rb_thread_restore_context()` +### `rb_thread_restore_context()` And finally, @@ -1345,7 +1350,7 @@ which is the function to restore a thread.

▼ `rb_thread_restore_context()`

-
+```TODO-lang
 7635  static void
 7636  rb_thread_restore_context(th, exit)
 7637      rb_thread_t th;
@@ -1385,7 +1390,7 @@ which is the function to restore a thread.
 7690  }
 
 (eval.c)
-
+``` @@ -1417,7 +1422,7 @@ This is done by the `stack_extend()` in the first half.

▼ `stack_extend()`

-
+```TODO-lang
 7624  static void
 7625  stack_extend(th, exit)
 7626      rb_thread_t th;
@@ -1430,7 +1435,7 @@ This is done by the `stack_extend()` in the first half.
 7633  }
 
 (eval.c)
-
+``` By allocating a local variable (which will be put at the machine stack space) @@ -1451,7 +1456,7 @@ such as possible procedures after returning from `stack_extend()`. -h3. Issues +### Issues This is the implementation of the `ruby` thread switch. diff --git a/yacc.textile b/yacc.md similarity index 94% rename from yacc.textile rename to yacc.md index da0ef97..a649383 100644 --- a/yacc.textile +++ b/yacc.md @@ -4,11 +4,13 @@ title: YACC crash course --- Translated by Vincent ISAMBART & ocha- -h1. Chapter 9: `yacc` crash course +Chapter 9: `yacc` crash course +------------------------------ -h2. Outline +Outline +======= -h3. Parser and scanner +### Parser and scanner How to write parsers for programming languages has been an active area of research for a long time, and there is a quite firm established @@ -36,7 +38,7 @@ By the way, it seems the reason white spaces had not meaning in Fortran 77 was that when writing programs on punch cards it was easy to make errors in the number of spaces. -h3. List of symbols +### List of symbols I said that the scanner spits out a list of words (tokens), but, to be exact, what the scanner creates is a list of "symbols", not words. @@ -55,7 +57,7 @@ these symbols are. For example, `NUMBER` or `DIGIT` for numbers, `IDENTIFIER` for names like "`name`", `IF` for the reserved word `if`. These symbols are then given to the next phase. -h3. Parser generator +### Parser generator The list of words and symbols spitted out by the scanner are going to be used to form a tree. This tree is called a syntax tree. @@ -106,9 +108,10 @@ satisfied, you can also read "Compilers" (also known as the "dragon book" because of the dragon on its cover) by Alfred V. Aho, Ravi Sethi and Jeffrey D. Ullman. -h2. Grammar +Grammar +======= -h3. Grammar file +### Grammar file The input file for `yacc` is called "grammar file", as it's the file where the grammar is written. The convention is to name this grammar @@ -116,7 +119,10 @@ file `*.y`. It will be given to `yacc` who will generate C source code. This file can then be compiled as usual (figure 1 shows the full process). -!images/ch_yacc_build.jpg(Figure 1: File dependencies)! +
+ figure 1: Figure 1: File dependencies +
figure 1: Figure 1: File dependencies
+
The output file name is always `y.tab.c` and can't be changed. The recent versions of `yacc` usually allow to change it on the command @@ -128,7 +134,7 @@ It's good to have a look at the file once. The grammar file's content has the following form: ▼ General form of the grammar file -
+```TODO-lang
 %{
 Header
 %}
@@ -140,7 +146,7 @@ Header
 Rules part
 %%
 User defined part
-
+``` `yacc`'s input file is first divided in 3 parts by `%%`. The first part if called the definition part, has a lot of definitions and @@ -158,7 +164,7 @@ by the user. `yacc` just copies this part verbatim in the output file. It's used for example to put auxiliary routines needed by the parser. -h3. What does `yacc` do. +### What does `yacc` do. What `yacc` takes care of is mainly this rules part in the middle. `yacc` takes the grammar written there and use it to make a @@ -177,7 +183,7 @@ survived to this day even though we keep complaining about it. But what on earth is this core part? That's what we're going to see. -h3. BNF +### BNF When we want to write a parser in C, its code will be "cut the string this way, make this an `if` statement..." When using parser @@ -190,9 +196,9 @@ But how can we tell the specification? With `yacc`, the method of description used is the BNF (Backus-Naur Form). Let's look at a very simple example. -
+```TODO-lang
 if_stmt: IF expr THEN stmt END
-
+``` Let's see separately what's at the left and at the right of the "`:`". The part on the left side, `if_stmt`, is equal to the right @@ -213,10 +219,10 @@ being able to use `else`. And `even` if we could write `else`, having to always write the `else` even when it's useless would be cumbersome. In this case we could do the following: -
+```TODO-lang
 if_stmt: IF expr THEN stmt END
        | IF expr THEN stmt ELSE stmt END
-
+``` "`|`" means "or". @@ -230,10 +236,10 @@ Here I would like you to pay attention to the split done with `|` is just a shorter way to repeat the left side. The previous example has exactly the same meaning as the following: -
+```TODO-lang
 if_stmt: IF expr THEN stmt END
 if_stmt: IF expr THEN stmt ELSE stmt END
-
+``` This means two rules are defined in the example. @@ -242,7 +248,7 @@ statement. That's because the symbols `expr` and `stmt` are not sent by the scanner, their rules must be defined. To be closer to Ruby, let's boldly add some rules. -
+```TODO-lang
 stmt   : if_stmt
        | IDENTIFIER '=' expr   /* assignment */
        | expr
@@ -257,7 +263,7 @@ expr   : IDENTIFIER       /* reading a variable */
 funcall: IDENTIFIER '(' args ')'
 
 args   : expr             /* only one parameter */
-
+``` I used two new elements. First, comments of the same form as in C, and character expressed using `'='`. This `'='` is also of course a @@ -282,7 +288,7 @@ equivalent. That's because `NUMBER` is `expr` and `expr` is `stmt`. We can also say that more complicated things are equivalent. -
+```TODO-lang
               stmt
                ↓
              if_stmt
@@ -292,7 +298,7 @@ We can also say that more complicated things are equivalent.
 IF IDENTIFIER THEN expr END
                     ↓
 IF IDENTIFIER THEN NUMBER END
-
+``` When it has expanded until here, all elements become the symbols sent by the scanner. @@ -300,7 +306,7 @@ It means such sequence of symbols is correct as a program. Or putting it the other way around, if this sequence of symbols is sent by the scanner, the parser can understand it in the opposite order of expanding. -
+```TODO-lang
 IF IDENTIFIER THEN NUMBER END
                     ↓
 IF IDENTIFIER THEN expr END
@@ -310,7 +316,7 @@ IF IDENTIFIER THEN expr END
              if_stmt
                ↓
               stmt
-
+``` And `stmt` is a symbol expressing the whole program. That's why this sequence of symbols is a correct program for the parser. When it's the @@ -324,7 +330,7 @@ ones for which the boxes where filled correctly. Parser and government office are strangely similar for instance in the fact that they care about details in specification and that they use complicated terms. -h3. Terminal symbols and nonterminal symbols +### Terminal symbols and nonterminal symbols Well, in the confusion of the moment I used without explaining it the expression "symbols coming from the scanner". So let's explain this. I @@ -345,19 +351,19 @@ exist in the parser. Nonterminal symbols also always appear at one moment or the other as the left side of a rule. In this chapter, nonterminal symbols are always written in lower case letters. -h3. How to test +### How to test I'm now going to tell you the way to process the grammar file with `yacc`. -
+```TODO-lang
 %token A B C D E
 %%
 list: A B C
     | de
 
 de  : D E
-
+``` First, put all terminal symbols used after `%token`. However, you do not have to type the symbols with quotes (like `'='`). Then, put `%%` @@ -365,19 +371,19 @@ to mark a change of section and write the grammar. That's all. Let's now process this. -
+```TODO-lang
 % yacc first.y
 % ls
 first.y  y.tab.c
 %
-
+``` Like most Unix tools, "silence means success". There's also implementations of `yacc` that need semicolons at the end of (groups of) rules. When it's the case we need to do the following: -
+```TODO-lang
 %token A B C D E
 %%
 list: A B C
@@ -386,71 +392,71 @@ list: A B C
 
 de  : D E
     ;
-
+``` I hate these semicolons so in this book I'll never use them. -h3. Void rules +### Void rules Let's now look a little more at some of the established ways of grammar description. I'll first introduce void rules. -
+```TODO-lang
 void:
-
+``` There's nothing on the right side, this rule is "void". For example, the two following `target`s means exactly the same thing. -
+```TODO-lang
 target: A B C
 
 target: A void B void C
 void  :
-
+``` What is the use of such a thing? It's very useful. For example in the following case. -
+```TODO-lang
 if_stmt : IF expr THEN stmts opt_else END
 
 opt_else:
         | ELSE stmts
-
+``` Using void rules, we can express cleverly the fact that "the `else` section may be omitted". Compared to the rules made previously using two definitions, this way is shorter and we do not have to disperse the burden. -h3. Recursive definitions +### Recursive definitions The following example is still a little hard to understand. -
+```TODO-lang
 list: ITEM         /* rule 1 */
     | list ITEM    /* rule 2 */
-
+``` This expresses a list of one or more items, in other words any of the following lists of symbols: -
+```TODO-lang
 ITEM
 ITEM ITEM
 ITEM ITEM ITEM
 ITEM ITEM ITEM ITEM
       :
-
+``` Do you understand why? First, according to rule 1 `list` can be read `ITEM`. If you merge this with rule 2, `list` can be `ITEM ITEM`. -
+```TODO-lang
 list: list ITEM
     = ITEM ITEM
-
+``` We now understand that the list of symbols `ITEM ITEM` is similar to `list`. By applying again rule 2 to `list`, we can say that 3 `ITEM` @@ -461,21 +467,21 @@ This is something like mathematical induction. I'll now show you the next example. The following example expresses the lists with 0 or more `ITEM`. -
+```TODO-lang
 list:
     | list ITEM
-
+``` First the first line means "`list` is equivalent to (void)". By void I mean the list with 0 `ITEM`. Then, by looking at rule 2 we can say that "`list ITEM`" is equivalent to 1 `ITEM`. That's because `list` is equivalent to void. -
+```TODO-lang
 list: list   ITEM
     = (void) ITEM
     =        ITEM
-
+``` By applying the same operations of replacement multiple times, we can understand that `list` is the expression a list of 0 or more items. @@ -484,17 +490,18 @@ With this knowledge, "lists of 2 or more `ITEM`" or "lists of 3 or more `ITEM`" are easy, and we can even create "lists of an even number of elements". -
+```TODO-lang
 list:
     | list ITEM ITEM
-
+``` -h2. Construction of values +Construction of values +====================== This abstract talk lasted long enough so in this section I'd really like to go on with a more concrete talk. -h3. Shift and reduce +### Shift and reduce Up until now, various ways to write grammars have been explained, but what we want is being able to build a syntax tree. @@ -506,30 +513,30 @@ something to the rules. We'll first see what the parser does during the execution. We'll use the following simple grammar as an example. -
+```TODO-lang
 %token A B C
 %%
 program: A B C
-
+``` In the parser there is a stack called the semantic stack. The parser pushes on it all the symbols coming from the scanner. This move is called "shifting the symbols". -
+```TODO-lang
 [ A B ] ← C   shift
-
+``` And when any of the right side of a rule is equal to the end of the stack, it is "interpreted". When this happens, the sequence of the right-hand side is replaced by the symbol of the left-hand side. -
+```TODO-lang
 [ A B C ]
     ↓         reduction
 [ program ]
-
+``` This move is called "reduce `A B C`" to `program`". This term is a little presumptuous, but in short it is like, @@ -546,7 +553,7 @@ found out. Therefore, if the input is just finished here, it is accepted. Let's try with a little more complicated grammar. -
+```TODO-lang
 %token IF E S THEN END
 %%
 program : if
@@ -557,15 +564,15 @@ expr    : E
 
 stmts   : S
         | stmts S
-
+``` The input from the scanner is this. -
+```TODO-lang
 IF  E  THEN  S  S  S  END
-
+``` The transitions of the semantic stack in this case are shown below. @@ -596,7 +603,7 @@ If there's a void rule, it's possible that a symbol is generated out of "void". -h3. Action +### Action Now, I'll start to describe the important parts. @@ -610,9 +617,9 @@ parser performing a reduction." The hooks are called actions of the parser. An action can be written at the last of the rule as follows. -
+```TODO-lang
 program: A B C { /* Here is an action */ }
-
+``` The part between `{` and `}` is the action. If you write like this, @@ -623,7 +630,7 @@ If it is a C code, almost all things can be written. -h3. The value of a symbol +### The value of a symbol This is further more important but, each symbol has "its value". @@ -637,10 +644,10 @@ Each symbol and its value are pushed together on the semantic stack. The next figure shows the state just the moment `S` is shifted with its value. -
+```TODO-lang
 IF     expr    THEN    stmts   S
 value  value   value   value   value
-
+``` According to the previous rule, `stmts S` can be reduced to `stmts`. @@ -649,14 +656,14 @@ but at that moment, the values of the symbols corresponding to the right-hand side are passed to the action. -
+```TODO-lang
 IF    expr   THEN   stmts  S      /* Stack */
 v1    v2     v3     v4     v5
                     ↓     ↓
             stmts:  stmts  S      /* Rule */
                     ↓     ↓
                   { $1  +  $2; }  /* Action */
-
+``` This way an action can take the value of each symbol corresponding to the @@ -672,7 +679,7 @@ It is expressed as `$$` in actions, the value of `$$` when leaving an action will be the value of the left-hand side symbol. -
+```TODO-lang
 IF    expr   THEN   stmts  S      /* the stack just before reducing */
 v1    v2     v3     v4     v5
                     ↓     ↓
@@ -683,7 +690,7 @@ v1    v2     v3     v4     v5
 
 IF    expr   THEN   stmts         /* the stack after reducing */
 v1    v2     v3     (v4+v5)
-
+``` As the end of this section, this is just an extra. @@ -694,7 +701,7 @@ and it is called "semantic stack" for short. -h3. `yacc` and types +### `yacc` and types It's really cumbersome but without talking about types we cannot finish this @@ -717,7 +724,7 @@ we want to use structs and pointers and the other various things. Therefore for instance, we use `%union` as follows. -
+```TODO-lang
 %union {
     struct node {
         int type;
@@ -727,7 +734,7 @@ Therefore for instance, we use `%union` as follows.
     int num;
     char *str;
 }
-
+``` Because this is not for practical use, @@ -739,7 +746,7 @@ end of the `%unicon` block. And, if this is written, it would look like the following in `y.tab.c`. -
+```TODO-lang
 typedef union {
     struct node {
         int type;
@@ -749,28 +756,28 @@ typedef union {
     int num;
     char *str;
 } YYSTYPE;
-
+``` And, as for the semantic stack, -
+```TODO-lang
 YYSTYPE yyvs[256];       /* the substance of the stack(yyvs = YY Value Stack) */
 YYSTYPE *yyvsp = yyvs;   /* the pointer to the end of the stack */
-
+``` we can expect something like this. Therefore, the values of the symbols appear in actions would be -
+```TODO-lang
 /* the action before processed by yacc */
 target: A B C { func($1, $2, $3); }
 
 /* after converted, its appearance in y.tab.c */
 { func(yyvsp[-2], yyvsp[-1], yyvsp[0]); ;
-
+``` naturally like this. @@ -788,21 +795,21 @@ using `%type` for nonterminal symbols, it is written as follows. -
+```TODO-lang
 %token A B C    /* All of the values of A B C is of type int */
 %type target    /* All of the values of target is of type char* */
-
+``` On the other hand, if you'd like to specify everytime, you can write a member name into next to `$` as follows. -
+```TODO-lang
 %union { char *str; }
 %%
 target: { $$ = "In short, this is like typecasting"; }
-
+``` You'd better avoid using this method if possible.
@@ -811,7 +818,7 @@ Defining a member for each symbol is basic. -h3. Coupling the parser and the scanner together +### Coupling the parser and the scanner together After all, I've finished to talk all about this and that of the values inside the parser. @@ -832,14 +839,14 @@ its member name has to be manually written. The very simple examples would look like the following. -
+```TODO-lang
 static int
 yylex()
 {
     yylval.str = next_token();
     return STRING;
 }
-
+``` Figure 2 summarizes the relationships described by now. @@ -856,30 +863,30 @@ Figure 2: Relationships among `yacc` related variables & functions -h3. Embedded Action +### Embedded Action An action is written at the last of a rule, is how it was explained. However, actually it can be written in the middle of a rule. -
+```TODO-lang
 target: A B { puts("embedded action"); } C D
-
+``` This is called "embedded action".
An embedded action is merely a syntactic sugar of the following definition: -
+```TODO-lang
 target: A B dummy C D
 
 dummy :     /* void rule */
         {
             puts("embedded action");
         }
-
+``` From this example, you might be able to tell everything including @@ -892,10 +899,11 @@ the value of the embedded action will come out as `$3`. -h2. Practical Topics +Practical Topics +================ -h3. Conflicts +### Conflicts I'm not afraid of `yacc` anymore. @@ -911,10 +919,10 @@ matches the end of the stack", but what happens if there's a rule like this: -
+```TODO-lang
 target  : A B C
         | A B C
-
+``` When the sequence of symbols `A B C` actually comes out, @@ -926,10 +934,10 @@ it would complain that a reduce/reduce conflict occurs. It means multiple rules are possible to reduce at the same time. -
+```TODO-lang
 % yacc rrconf.y
 conflicts:  1 reduce/reduce
-
+``` But usually, I think you won't do such things except as an accident.
@@ -937,14 +945,14 @@ But how about the next example? The described symbol sequence is completely the same. -
+```TODO-lang
 target  : abc
         | A bc
 
 abc     : A B C
 
 bc      :   B C
-
+``` This is relatively possible. Especially when each part is complicatedly moved @@ -955,14 +963,14 @@ without noticing. There's also a similar pattern, as follows: -
+```TODO-lang
 target  : abc
         | ab C
 
 abc     : A B C
 
 ab      : A B
-
+``` When the symbol sequence `A B C` comes out, @@ -973,10 +981,10 @@ This means there're both a shift-able rule and a reduce-able rule at the same time. -
+```TODO-lang
 % yacc srconf.y
 conflicts:  1 shift/reduce
-
+``` The famous example of shift/reduce conflicts is "the hanging `else` problem". @@ -984,7 +992,7 @@ For example, the `if` statement of C language causes this problem. I'll describe it by simplifying the case: -
+```TODO-lang
 stmt     : expr ';'
          | if
 
@@ -992,7 +1000,7 @@ expr     : IDENTIFIER
 
 if       : IF '(' expr ')' stmt
          | IF '(' expr ')' stmt  ELSE stmt
-
+``` In this rule, @@ -1001,20 +1009,20 @@ the substance of `if` is only one statement. Now, what happens if the next program is parsed with this grammar? -
+```TODO-lang
 if (cond)
     if (cond)
         true_stmt;
     else
         false_stmt;
-
+``` If it is written this way, we might feel like it's quite obvious. But actually, this can be interpreted as follows. -
+```TODO-lang
 if (cond) {
     if (cond)
         true_stmt;
@@ -1022,7 +1030,7 @@ if (cond) {
 else {
     false_stmt;
 }
-
+``` The question is @@ -1041,18 +1049,18 @@ it choses shift by default when a shift/reduce conflict occurs. -h3. Look-ahead +### Look-ahead As an experiment, I'd like you to process the next grammar with `yacc`. -
+```TODO-lang
 %token A B C
 %%
 target  : A B C   /* rule 1 */
         | A B     /* rule 2 */
-
+``` We can't help expecting there should be a conflict. @@ -1062,10 +1070,10 @@ the rule 2 would attempt to reduce. In other words, this should cause a shift/reduce conflict. However, .... -
+```TODO-lang
 % yacc conf.y
 %
-
+``` It's odd, there's no conflict. Why? @@ -1110,7 +1118,7 @@ but the look-ahead during executions. -h3. Operator Precedence +### Operator Precedence Since abstract talks have lasted for long, I'll talk more concretely. @@ -1119,7 +1127,7 @@ There are also established tactics for this, we'd better tamely follow it. Something like a calculator for arithmetic operations is defined below: -
+```TODO-lang
 expr    : expr '+' expr
         | expr '-' expr
         | expr '*' expr
@@ -1128,7 +1136,7 @@ expr    : expr '+' expr
 
 primary : NUMBER
         | '(' expr ')'
-
+``` `primary` is the smallest grammar unit. @@ -1139,28 +1147,28 @@ Then, if this grammar is written to an arbitrary file and compiled, the result would be this. -
+```TODO-lang
 % yacc infix.y
 16 shift/reduce conflicts
-
+``` They conflict aggressively. Thinking for 5 minutes is enough to see that this rule causes a problem in the following and similar cases: -
+```TODO-lang
 1 - 1 - 1
-
+``` This can be interpreted in both of the next two ways. -
+```TODO-lang
 (1 - 1) - 1
 1 - (1 - 1)
-
+``` The former is natural as an numerical expression. @@ -1176,10 +1184,10 @@ Then, what we can do is writing this in the definition part. -
+```TODO-lang
 %left '+' '-'
 %left '*' '/'
-
+``` These instructions specifies both the precedence and the associativity @@ -1193,23 +1201,23 @@ Describing it logically is complicated, so if I put it instinctively, it is about to which operator parentheses are attached in the following and similar cases. -
+```TODO-lang
 1 + 2 * 3
-
+``` If `*` has higher precedence, it would be this. -
+```TODO-lang
 1 + (2 * 3)
-
+``` If `+` has higher precedence, it would be this. -
+```TODO-lang
 (1 + 2) * 3
-
+``` As shown above, resolving shift/reduce conflicts @@ -1220,9 +1228,9 @@ However, if the operators has the same precedence, how can it be resolved? Like this, for instance, -
+```TODO-lang
 1 - 2 - 3
-
+``` because both operators are `-`, their precedences are the completely same. In this case, it is resolved by using the associativity. @@ -1240,19 +1248,19 @@ Most of the operators for numerical expressions are left-associative. The right-associative is used mainly for `=` of assignment and `not` of denial. -
+```TODO-lang
 a = b = 1    # (a = (b = 1))
 not not a    # (not (not a))
-
+``` The representatives of non-associative are probably the comparison operators. -
+```TODO-lang
 a == b == c   # parse error
 a <= b <= c   # parse error
-
+``` However, this is not the only possibility. @@ -1266,8 +1274,8 @@ The lower the operators written, the higher the precedences they have. If they are written in the same line, they have the same level of precedence. -
+```TODO-lang
 %left  '+' '-'    /* left-associative and third precedence  */
 %left  '*' '/'    /* left-associative and second precedence */
 %right '!'        /* right-associative and first precedence */
-
+``` From 0a4c68c91a5ffb9163cf81dbeef7a8a8059e9da0 Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Sat, 17 Apr 2021 20:49:43 +0200 Subject: [PATCH 05/14] Manual advancement --- anyeval.md | 42 ++-- class.md | 117 +++++------ contextual.md | 36 ++-- intro.md | 522 +++++++++++++++++++++++++------------------------- iterator.md | 2 +- load.md | 1 + parser.md | 141 ++++++++------ security.md | 13 +- spec.md | 32 ++-- thread.md | 67 ++++--- variable.md | 3 + yacc.md | 2 +- 12 files changed, 513 insertions(+), 465 deletions(-) diff --git a/anyeval.md b/anyeval.md index 2868ae0..0625f92 100644 --- a/anyeval.md +++ b/anyeval.md @@ -28,7 +28,7 @@ Its return value is the value of the last expression of the program. -```TODO-lang +```ruby p eval("1 + 1") # 2 ``` @@ -37,7 +37,7 @@ You can also refer to a variable in its scope from inside of a string to `eval`. -```TODO-lang +```ruby lvar = 5 @ivar = 6 p eval("lvar + @ivar") # 11 @@ -54,7 +54,7 @@ And you can also define methods and define classes. -```TODO-lang +```ruby def a eval('class C; def test() puts("ok") end end') end @@ -70,7 +70,7 @@ its environment. -```TODO-lang +```ruby def new_env n = 5 Proc.new { nil } # turn the environment of this method into an object and return it @@ -93,7 +93,7 @@ is as if in a module statement or a class statement. -```TODO-lang +```ruby lvar = "toplevel lvar" # a local variable to confirm this scope module M @@ -113,7 +113,7 @@ singleton class statement is the object. -```TODO-lang +```ruby lvar = "toplevel lvar" # a local variable to confirm this scope obj = Object.new @@ -133,7 +133,7 @@ For instance, -```TODO-lang +```ruby obj = Object.new p obj # # obj.instance_eval { @@ -169,7 +169,7 @@ parameters. Let's assume the form of call is limited to the below: -```TODO-lang +```ruby eval(prog_string, some_block) ``` @@ -180,7 +180,7 @@ The function prototype of `eval()` is: -```TODO-lang +```c static VALUE eval(VALUE self, VALUE src, VALUE scope, char *file, int line); ``` @@ -193,7 +193,7 @@ is supposed to be located. Then, let's see the content:

▼ `eval()` (simplified)

-```TODO-lang +```c 4984 static VALUE 4985 eval(self, src, scope, file, line) 4986 VALUE self, src, scope; @@ -310,7 +310,7 @@ Here is `compile()`.

▼ `compile()`

-```TODO-lang +```c 4968 static NODE* 4969 compile(src, file, line) 4970 VALUE src; @@ -362,7 +362,7 @@ They are called in this sort of way.

▼ How `top_local_init()` is called

-```TODO-lang +```c program : { top_local_init(); } compstmt { top_local_setup(); } @@ -376,7 +376,7 @@ And this is the content of it:

▼ `top_local_init()`

-```TODO-lang +```c 5273 static void 5274 top_local_init() 5275 { @@ -407,7 +407,7 @@ Next, here is `top_local_setup()`.

▼ `top_local_setup()`

-```TODO-lang +```c 5291 static void 5292 top_local_setup() 5293 { @@ -471,7 +471,7 @@ it is `yycompile()`.

▼ setting `ruby_dyna_vars` aside

-```TODO-lang +```c static NODE* yycompile(f, line) { @@ -518,7 +518,7 @@ I'd like the readers who noticed this to be relieved by reading the next part.

▼ `yycompile()` − freeing `ruby_dyna_vars`

-```TODO-lang +```c 2386 vp = ruby_dyna_vars; 2387 ruby_dyna_vars = vars; 2388 lex_strterm = 0; @@ -552,7 +552,7 @@ and the substance of `Object#instance_eval` is `rb_obj_instance_eval()`.

▼ `rb_mod_module_eval() rb_obj_instance_eval()`

-```TODO-lang +```c 5316 VALUE 5317 rb_mod_module_eval(argc, argv, mod) 5318 int argc; @@ -626,7 +626,7 @@ it becomes very comprehensible in comparison to the one before being absorbed.

specific_eval()instance_eval, eval, string

-```TODO-lang +```c static VALUE instance_eval_string(self, src, file, line) VALUE self, src; @@ -697,7 +697,7 @@ Here is the result of cutting them all.

▼ `specific_eval()` (simplified)

-```TODO-lang +```c 5258 static VALUE 5259 specific_eval(argc, argv, klass, self) 5260 int argc; @@ -735,7 +735,7 @@ Next, we'll look at `eval_under()` and `eval_under_i()`.

▼ `eval_under()`

-```TODO-lang +```c 5222 static VALUE 5223 eval_under(under, self, src, file, line) 5224 VALUE under, self, src; @@ -797,7 +797,7 @@ Also in the previous absorbed version, for only this point, -```TODO-lang +```c VALUE sclass = .....; VALUE cbase = sclass; ``` diff --git a/class.md b/class.md index 04e77e0..67ec16a 100644 --- a/class.md +++ b/class.md @@ -38,7 +38,7 @@ API. I'll introduce to you these functions one by one. Ruby array class, `Array`, as an example. ▼ `Array` class definition -```TODO-lang +```c 19 VALUE rb_cArray; 1809 void @@ -60,7 +60,7 @@ the class object, it also defines the constant. That means that after this you can already access `Array` from a Ruby program. It corresponds to the following Ruby program: -```TODO-lang +```ruby class Array < Object ``` @@ -75,7 +75,7 @@ class nested in an other class or module. This time the example is what is returned by `stat(2)`, `File::Stat`. ▼ Definition of `File::Stat` -```TODO-lang +```c 78 VALUE rb_cFile; 80 static VALUE rb_cStat; @@ -87,7 +87,7 @@ what is returned by `stat(2)`, `File::Stat`. This code corresponds to the following Ruby program; -```TODO-lang +```ruby class File < IO class Stat < Object ``` @@ -99,7 +99,7 @@ This time again I omitted the `end` on purpose. `rb_define_module()` is simple so let's end this quickly. ▼ Definition of `Enumerable` -```TODO-lang +```c 17 VALUE rb_mEnumerable; 492 rb_mEnumerable = rb_define_module("Enumerable"); @@ -111,7 +111,7 @@ The `m` in the beginning of `rb_mEnumerable` is similar to the `c` for classes: it shows that it is a module. The corresponding Ruby program is: -```TODO-lang +```ruby module Enumerable ``` @@ -124,7 +124,7 @@ This time the function is the one for defining methods, example from `Array`. ▼ Definition of `Array#to_s` -```TODO-lang +```c 1818 rb_define_method(rb_cArray, "to_s", rb_ary_to_s, 0); (array.c) @@ -136,7 +136,7 @@ the number of parameters taken by the method. As `to_s` does not take any parameters, it's 0. If we write the corresponding Ruby program, we'll have this: -```TODO-lang +```ruby class Array < Object def to_s # content of rb_ary_to_s() @@ -152,7 +152,7 @@ the enclosing `class` part. One more example, this time taking a parameter: ▼ Definition of `Array#concat` -```TODO-lang +```c 1835 rb_define_method(rb_cArray, "concat", rb_ary_concat, 1); (array.c) @@ -163,7 +163,7 @@ The class for the definition is `rb_cArray` is `rb_ary_concat()` and the number of parameters is 1. It corresponds to writing the corresponding Ruby program: -```TODO-lang +```ruby class Array < Object def concat( str ) # content of rb_ary_concat() @@ -180,7 +180,7 @@ show it here, but for a particular reason we'll look at `File.link` instead. ▼ Definition of `File.link` -```TODO-lang +```c 2624 rb_define_singleton_method(rb_cFile, "link", rb_file_s_link, 2); (file.c) @@ -199,7 +199,7 @@ instance, for `Array` a function `Init_Array()` like this has been made: ▼ `Init_Array` -```TODO-lang +```c 1809 void 1810 Init_Array() 1811 { @@ -223,7 +223,7 @@ The `Init` for the built-in functions are explicitly called during the startup of `ruby`. This is done in `inits.c`. ▼ `rb_call_inits()` -```TODO-lang +```c 47 void 48 rb_call_inits() 49 { @@ -250,7 +250,7 @@ That explains it for the built-in libraries, but what about extension libraries? In fact, for extension libraries the convention is the same. Take the following code: -```TODO-lang +```ruby require "myextension" ``` @@ -264,7 +264,7 @@ The following example is from `stringio`, an extension library provided with `ruby`, that is to say not from a built-in library. ▼ `Init_stringio()` (beginning) -```TODO-lang +```c 895 void 896 Init_stringio() 897 { @@ -291,7 +291,7 @@ in `m_tbl` will do. But what about singleton methods? We'll now look into the way singleton methods are defined. ▼ `rb_define_singleton_method()` -```TODO-lang +```c 721 void 722 rb_define_singleton_method(obj, name, func, argc) 723 VALUE obj; @@ -323,7 +323,7 @@ Well, let's confirm what the singleton classes are made of. It's too simple to just show you the code of a function each time so this time I'll use a new weapon, a call graph. -```TODO-lang +``` rb_define_singleton_method rb_define_method rb_singleton_class @@ -382,7 +382,7 @@ of `rb_define_class()` itself. I have some reasons to be interested in something that's deeper. That's why we will first look at the call graph of `rb_define_class()`. -```TODO-lang +``` rb_define_class rb_class_inherited rb_define_class_id @@ -397,7 +397,7 @@ I'm interested by `rb_class_new()`. Doesn't this name means it creates a new class? Let's confirm that. ▼ `rb_class_new()` -```TODO-lang +```c 37 VALUE 38 rb_class_new(super) 39 VALUE super; @@ -420,7 +420,7 @@ ignore it. `rb_raise()` is error handling so we can ignore it. Only `rb_class_boot()` remains. So let's look at it. ▼ `rb_class_boot()` -```TODO-lang +```c 21 VALUE 22 rb_class_boot(super) 23 VALUE super; @@ -459,7 +459,7 @@ and `rb_class_new()` is almost identical. Then, let's once more look at `rb_singleton_class()`'s call graph: -```TODO-lang +``` rb_singleton_class SPECIAL_SINGLETON rb_make_metaclass @@ -480,7 +480,7 @@ far, we just need to read `rb_singleton_class()` and non-essential parts. ▼ `rb_singleton_class()` -```TODO-lang +```c 678 #define SPECIAL_SINGLETON(x,c) do {\ 679 if (obj == (x)) {\ 680 return c;\ @@ -550,7 +550,7 @@ them. ### Compressed `rb_make_metaclass()` ▼ `rb_make_metaclass()` -```TODO-lang +```c 142 VALUE 143 rb_make_metaclass(obj, super) 144 VALUE obj, super; @@ -586,7 +586,7 @@ because parameters, return values and local variables are all `VALUE`. That makes us able to compress to the following: ▼ `rb_singleton_class() rb_make_metaclass()` (after compression) -```TODO-lang +```c rb_singleton_class(obj) { if (FL_TEST(RBASIC(obj)->klass, FL_SINGLETON) && @@ -632,7 +632,7 @@ we'll remove it. With these simplifications, we get the following: ▼ `rb_singleton_class() rb_make_metaclass()` (after recompression) -```TODO-lang +```c rb_singleton_class(obj) { klass = create a class with RBASIC(obj)->klass as superclass; @@ -647,7 +647,7 @@ because `klass` is used too often. So let's rename the `klass` variable to `sclass`. ▼ `rb_singleton_class() rb_make_metaclass()` (variable substitution) -```TODO-lang +```c rb_singleton_class(obj) { sclass = create a class with RBASIC(obj)->klass as superclass; @@ -680,7 +680,7 @@ By the way, did you notice about, during the compression process, the call to `rb_singleton_class_attached()` was stealthily removed? Here: -```TODO-lang +```c rb_make_metaclass(obj, super) { klass = create a class with super as superclass; @@ -692,7 +692,7 @@ rb_make_metaclass(obj, super) Let's have a look at what it does. ▼ `rb_singleton_class_attached()` -```TODO-lang +```c 130 void 131 rb_singleton_class_attached(klass, obj) 132 VALUE klass, obj; @@ -853,7 +853,7 @@ In Ruby, singleton methods defined in a class are called class methods. However, their specification is a little strange. For some reasons, class methods are inheritable. -```TODO-lang +```ruby class A def A.test # defines a singleton method in A puts("ok") @@ -881,7 +881,7 @@ Then let's first look at the code defining classes. Class definition means of course `rb_define_class()`. Now let's take the call graph of this function. -```TODO-lang +``` rb_define_class rb_class_inherited rb_define_class_id @@ -906,7 +906,7 @@ surroundings again. Let's first start our reading with its caller, `rb_define_class_id()`. ▼ `rb_define_class_id()` -```TODO-lang +```c 160 VALUE 161 rb_define_class_id(id, super) 162 ID id; @@ -932,13 +932,13 @@ that there's the `rb_make_metaclass()` in question. I'm concerned by the fact that when called from `rb_singleton_class()`, the parameters were different. Last time was like this: -```TODO-lang +```c rb_make_metaclass(obj, RBASIC(obj)->klass); ``` But this time is like this: -```TODO-lang +```c rb_make_metaclass(klass, RBASIC(super)->klass); ``` @@ -949,7 +949,7 @@ depending on that? Let's have once again a look at a simplified #### `rb_make_metaclass` (once more) ▼ `rb_make_metaclass` (after first compression) -```TODO-lang +```c rb_make_metaclass(obj, super) { klass = create a class with super as superclass; @@ -973,14 +973,14 @@ again, something is done only for `T_CLASS`, in other words classes. This clearly looks important. In `rb_define_class_id()`, as it's called like this: -```TODO-lang +```c rb_make_metaclass(klass, RBASIC(super)->klass); ``` Let's expand `rb_make_metaclass()`'s parameter variables with the actual values. ▼ `rb_make_metaclass` (recompression) -```TODO-lang +```c rb_make_metaclass(klass, super_klass /* == RBASIC(super)->klass */) { sclass = create a class with super_class as superclass; @@ -1019,7 +1019,7 @@ conclusion that `Object`'s class must be `(Object)`. And that's the case in practice. For example, by inheriting like in the following program : -```TODO-lang +```ruby class A < Object end class B < A @@ -1057,7 +1057,7 @@ The second question: the class of `Object` must be `Class`. Didn't I properly confirm this in chapter 1: Ruby language minimum by using `class()` method? -```TODO-lang +```ruby p(Object.class()) # Class ``` @@ -1068,7 +1068,7 @@ classes. Let's look at the body of the method, `rb_obj_class()` to confirm that. ▼ `rb_obj_class()` -```TODO-lang +```c 86 VALUE 87 rb_obj_class(obj) 88 VALUE obj; @@ -1137,7 +1137,7 @@ in `ruby`, only these 3 classes's creation is handled specially. Then let's look at the code: ▼ `Object`, `Module` and `Class` creation -```TODO-lang +```c 1243 rb_cObject = boot_defclass("Object", 0); 1244 rb_cModule = boot_defclass("Module", rb_cObject); 1245 rb_cClass = boot_defclass("Class", rb_cModule); @@ -1186,7 +1186,7 @@ First we'll read `rb_defined_class()`. After the end of this function, the class can be found from the constant. ▼ `rb_define_class()` -```TODO-lang +```c 183 VALUE 184 rb_define_class(name, super) 185 const char *name; @@ -1254,7 +1254,7 @@ that's the reason of such halfway description around here. Moreover, about this coming after `rb_define_class_id()`, -```TODO-lang +```c st_add_direct(rb_class_tbl, id, klass); ``` @@ -1275,7 +1275,7 @@ implemented? In fact this is done by `rb_name_class()` which already appeared a long time ago. The call is around the following: -```TODO-lang +``` rb_define_class rb_define_class_id rb_name_class @@ -1284,7 +1284,7 @@ rb_define_class Let's look at its content: ▼ `rb_name_class()` -```TODO-lang +```c 269 void 270 rb_name_class(klass, id) 271 VALUE klass; @@ -1311,7 +1311,7 @@ little more complicated. The function to define these nested classes is `rb_define_class_under()`. ▼ `rb_define_class_under()` -```TODO-lang +```c 215 VALUE 216 rb_define_class_under(outer, name, super) 217 VALUE outer; @@ -1362,7 +1362,7 @@ information starting from top-level, for example "`Net::NetPrivate::Socket`". ▼ `rb_set_class_path()` -```TODO-lang +```c 210 void 211 rb_set_class_path(klass, under, name) 212 VALUE klass, under; @@ -1393,7 +1393,7 @@ can't be seen from a Ruby program. In `rb_name_class()` there was `__classid__`, but `id` is different because it does not include nesting information (look at the table below). -```TODO-lang +``` __classpath__ Net::NetPrivate::Socket __classid__ Socket ``` @@ -1409,7 +1409,7 @@ Contrary to what I have just said, there are in fact cases in which neither `__classpath__` nor `__classid__` are set. That is because in Ruby you can use a method like the following to create a class. -```TODO-lang +```ruby c = Class.new() ``` @@ -1420,7 +1420,7 @@ If a class is created like this, it won't go through However, if later it's assigned to a constant, a name will be attached to the class at that moment. -```TODO-lang +```ruby SomeClass = c # the class name is SomeClass ``` @@ -1431,7 +1431,7 @@ For instance, when calling `p` on this this, a value equal to the class is searched in `rb_class_tbl`, and a name has to be chosen. The following case can also happen: -```TODO-lang +```ruby class A class B C = tmp = Class.new() @@ -1459,7 +1459,7 @@ precise, its body is `rb_mod_include()`, and there implementation finally calls `rb_include_module()`. Mixing what's happening in Ruby and C gives us the following call graph. -```TODO-lang +``` Module#include (rb_mod_include) Module#append_features (rb_mod_append_features) rb_include_module @@ -1470,7 +1470,7 @@ by `rb_include_module()`. This function is a little long so we'll look at it a half at a time. ▼ `rb_include_module` (first half) -```TODO-lang +```c /* include module in class */ 347 void 348 rb_include_module(klass, module) @@ -1503,7 +1503,7 @@ For the moment it's only security and type checking, therefore we can ignore it. The process itself is below: ▼ `rb_include_module` (second half) -```TODO-lang +```c 371 OBJ_INFECT(klass, module); 372 c = klass; 373 while (module) { @@ -1543,7 +1543,7 @@ First, what the (A) block does is written in the comment. It seems to be a special condition so let's first skip reading it for now. By extracting the important parts from the rest we get the following: -```TODO-lang +```c c = klass; while (module) { c = RCLASS(c)->super = include_class_new(module, RCLASS(c)->super); @@ -1559,7 +1559,7 @@ what, but at the moment I saw that I felt "Ah, doesn't this look the addition of elements to a list (like LISP's cons)?" and it suddenly make the story faster. In other words it's the following form: -```TODO-lang +``` list = new(item, list) ``` @@ -1572,7 +1572,8 @@ But to be sure of this we have to look at `include_class_new()`. ### `include_class_new()` ▼ `include_class_new()` -```TODO-lang + +```c 319 static VALUE 320 include_class_new(module, super) 321 VALUE module, super; @@ -1672,7 +1673,7 @@ that includes `m2`. From there, the changes made to include `m1` in Well, now we can explain the part of `rb_include_module()` we skipped. ▼ `rb_include_module` (avoiding double inclusion) -```TODO-lang +```c 378 /* (A) skip if the superclass already includes module */ 379 for (p = RCLASS(klass)->super; p; p = RCLASS(p)->super) { 380 switch (BUILTIN_TYPE(p)) { @@ -1707,7 +1708,7 @@ the modules included by it must also already be included... that's what I thought for a moment, but we can have the following context: -```TODO-lang +```ruby module M end module M2 diff --git a/contextual.md b/contextual.md index 6faa4cf..d7e3afb 100644 --- a/contextual.md +++ b/contextual.md @@ -30,7 +30,7 @@ they are used to separate words. However, Ruby is not an ordinary language and meanings can change significantly depending on the presence of spaces. Here is an example -```TODO-lang +```ruby a[i] = 1 # a[i] = (1) a [i] # a([i]) ``` @@ -41,7 +41,7 @@ parameter. Here is another example. -```TODO-lang +```ruby a + 1 # (a) + (1) a +1 # a(+1) ``` @@ -52,7 +52,7 @@ However, the above examples might give one the impression that only omitting the method call parentheses can be a source of trouble. Let’s look at a different example. -```TODO-lang +```ruby `cvs diff parse.y` # command call string obj.`("cvs diff parse.y") # normal method call ``` @@ -63,7 +63,7 @@ they could be handled quite differently. Below is another example where the functioning changes dramatically -```TODO-lang +```ruby print(< + +```ruby # Method call obj.method(arg) # Ruby rb_funcall(obj, rb_intern("method"), 1, arg); # C @@ -368,7 +370,7 @@ VALUE arr = rb_ary_new(); # C ``` It's good because it provides easiness in composing an extension library, and actually -it makes an indispensable prominence of @ruby@. However, it's also a burden for @ruby@ +it makes an indispensable prominence of `ruby`. However, it's also a burden for `ruby` implementation. You can see the affects of it in many places. The affects to GC and thread-processing is eminent. @@ -380,16 +382,16 @@ days, I will omit an explanation about the thread itself. I will start a story i `ruby`'s thread is a user-level thread that is originally written. The characteristic of this implementation is a very high portability in both specification and implementation. Surprisingly a MS-DOS can run the thread. Furthermore, you can expect the same response -in any environment. Many people mention that this point is the best feature of @ruby@. +in any environment. Many people mention that this point is the best feature of `ruby`. -However, as a trade off for such an extremeness of portability, @ruby@ abandons the speed. +However, as a trade off for such an extremeness of portability, `ruby` abandons the speed. It's, say, probably the slowest of all user-level thread implementations in this world. -The tendency of @ruby@ implementation may be seen here the most clearly. +The tendency of `ruby` implementation may be seen here the most clearly. Technique to read source code ============================= -Well. After an introduction of @ruby@, we are about to start reading source code. But wait. +Well. After an introduction of `ruby`, we are about to start reading source code. But wait. Any programmer has to read a source code somewhere, but I guess there are not many occasions that someone teaches you the concrete ways how to read. Why? @@ -397,7 +399,7 @@ Does it mean you can naturally read a program if you can write a program? But I can't think reading the program written by other people is so easy. In the same way as writing programs, there must be techniques and theories in reading programs. -And they are necessary. Therefore, before starting to ready @ruby@, I'd like to expand a general +And they are necessary. Therefore, before starting to ready `ruby`, I'd like to expand a general summary of an approach you need to take in reading a source code. ### Principles @@ -431,7 +433,7 @@ where you achieve the goal finally. #### Visualising the goal -Now let us suppose that our final goal is set "Understand all about @ruby@". This is certainly +Now let us suppose that our final goal is set "Understand all about `ruby`". This is certainly considered as "one set goal", but apparently it will not be useful for reading the source code actually. It will not be a trigger of any concrete action. Therefore, your first job will be to drag down the vague goal to the level of a concrete thing. @@ -525,15 +527,15 @@ one of the most powerful tools for creating abstractions in programming is namin but keeping this in mind will make reading much more efficient. Also, we'd like to know about coding rules beforehand to some extent. -For example, in C language, @extern@ function often uses prefix to distinguish the type of functions. +For example, in C language, `extern` function often uses prefix to distinguish the type of functions. And in object-oriented programs, function names sometimes contain the information about where they belong to in prefixes, -and it becomes valuable information (e.g. @rb_str_length@). +and it becomes valuable information (e.g. `rb_str_length`). #### Reading documents Sometimes a document describes the internal structure is included. -Especially be careful of a file named @HACKING@ etc. +Especially be careful of a file named `HACKING` etc. #### Reading the directory structure @@ -570,12 +572,12 @@ unconditionally, thus you should be familiar with them at an early stage. If you find both data and code, you should first investigate the data structure. In other words, when exploring code in C, it's better to start with header files. And in this case, let's make the most of our imagination from their filenames. -For example, if you find @frame.h@, it would probably be the stack frame definition. +For example, if you find `frame.h`, it would probably be the stack frame definition. Also, you can understand many things from the member names of a struct and their types. -For example, if you find the member @next@, which points to its own type, then it -will be a linked list. Similarly, when you find members such as @parent@, @children@, -and @sibling@, then it must be a tree structure. When @prev@, it will be a stack. +For example, if you find the member `next`, which points to its own type, then it +will be a linked list. Similarly, when you find members such as `parent`, `children`, +and `sibling`, then it must be a tree structure. When `prev`, it will be a stack. #### Understanding the calling relationship between functions @@ -586,9 +588,9 @@ functions. A tool to visualize the calling relationships is especially called a A text-based tool is sufficient, but it's even better if a tool can generate diagrams. However such tool is seldom available (especially few tools are for free). -When I analyzed @ruby@ to write this book, +When I analyzed `ruby` to write this book, I wrote a small command language and a parser in Ruby and -generated diagrams half-automatically by passing the results to the tool named @graphviz@. +generated diagrams half-automatically by passing the results to the tool named `graphviz`. #### Reading functions @@ -602,7 +604,7 @@ out the codes. What should exactly be cut out? It is hard to understand without seeing the actual example, thus it will be explained in the main part. Additionally, when you don't like its coding style, -you can convert it by using the tool like @indent@. +you can convert it by using the tool like `indent`. #### Experimenting by modifying it as you like @@ -633,13 +635,13 @@ itself, please be careful not to be enthusiastic very much. A program often comes with a document which is about the history of changes. For example, if it is a software of GNU, there's always a file named -@ChangeLog@. This is the best resource to know about "the reason why the +`ChangeLog`. This is the best resource to know about "the reason why the program is as it is". Alternatively, when a version control system like CVS or SCCS is used and you -can access it, its utility value is higher than @ChangeLog@. -Taking CVS as an example, @cvs annotate@, which displays the place which -modified a particular line, and @cvs diff@, which takes difference from the +can access it, its utility value is higher than `ChangeLog`. +Taking CVS as an example, `cvs annotate`, which displays the place which +modified a particular line, and `cvs diff`, which takes difference from the specified version, and so on are convenient. Moreover, in the case when there's a mailing list or a news group for @@ -652,13 +654,13 @@ change. Of course, if you can search online, it's also sufficient. Since various tools are available for various purposes, I can't describe them as a whole. -But if I have to choose only one of them, I'd recommend @global@. +But if I have to choose only one of them, I'd recommend `global`. The most attractive point is that its structure allows us to easily use it for -the other purposes. For instance, @gctags@, which comes with it, is actually a +the other purposes. For instance, `gctags`, which comes with it, is actually a tool to create tag files, but you can use it to create a list of the function names contained in a file. -```TODO-lang +``` ~/src/ruby % gctags class.c | awk '{print $1}' SPECIAL_SINGLETON SPECIAL_SINGLETON @@ -691,8 +693,8 @@ Build ### Target version -The version of @ruby@ described in this book is 1.7 (2002-09-12). -Regarding @ruby@, +The version of `ruby` described in this book is 1.7 (2002-09-12). +Regarding `ruby`, it is a stable version if its minor version is an even number, and it is a developing version if it is an odd number. Hence, 1.7 is a developing version. @@ -719,7 +721,7 @@ The archive of the target version is included in the attached CD-ROM. In the top directory of the CD-ROM, -```TODO-lang +``` ruby-rhg.tar.gz ruby-rhg.zip ruby-rhg.lzh @@ -728,9 +730,9 @@ ruby-rhg.lzh these three versions are placed, so I'd like you to use whichever one that is convenient for you. Of course, whichever one you choose, the content is the same. -For example, the archive of @tar.gz@ can be extracted as follows. +For example, the archive of `tar.gz` can be extracted as follows. -```TODO-lang +``` ~/src % mount /mnt/cdrom ~/src % gzip -dc /mnt/cdrom/ruby-rhg.tar.gz | tar xf - ~/src % umount /mnt/cdrom @@ -759,9 +761,9 @@ thus I'd like you to read this section for it. When it is a Unix-like OS, because generally it is equipped with a C compiler, by following the below procedures, it can pass in most cases. -Let us suppose @~/src/ruby@ is the place where the source code is extracted. +Let us suppose `~/src/ruby` is the place where the source code is extracted. -```TODO-lang +``` ~/src/ruby % ./configure ~/src/ruby % make ~/src/ruby % su @@ -772,16 +774,16 @@ Let us suppose @~/src/ruby@ is the place where the source code is extracted. Below, I'll describe several points to be careful about. On some platforms like Cygwin, UX/4800, -you need to specify the @--enable-shared@ option at the phase of @configure@, +you need to specify the `--enable-shared` option at the phase of `configure`, or you'd fail to link. -@--enable-shared@ is an option to put the most of @ruby@ out of the command -as shared libraries (@libruby.so@). +`--enable-shared` is an option to put the most of `ruby` out of the command +as shared libraries (`libruby.so`). -```TODO-lang +``` ~/src/ruby % ./configure --enable-shared ``` -The detailed tutorial about building is included in @doc/build.html@ of the +The detailed tutorial about building is included in `doc/build.html` of the attached CD-ROM, I'd like you to try as reading it. @@ -807,18 +809,18 @@ Visual C++ 5.0 and later is required. There's probably no problem if it is version 6 or .NET. MinGW or Minimalist GNU for Windows, -it is what the GNU compiling environment (Namely, @gcc@ and @binutils@) +it is what the GNU compiling environment (Namely, `gcc` and `binutils`) is ported on Windows. Cygwin ports the whole UNIX environment. On the contrary, MinGW ports only the tools to compile. Moreover, a program compiled with MinGW does not require any special DLL at -runtime. It means, the @ruby@ compiled with MinGW can be treated completely the +runtime. It means, the `ruby` compiled with MinGW can be treated completely the same as the Visual C++ version. Alternatively, if it is personal use, you can download the version 5.5 of Borland C++ Compiler for free from the site of Boarland. \footnote{The Borland site: http://www.borland.co.jp} -Because @ruby@ started to support this environment fairly recently, +Because `ruby` started to support this environment fairly recently, there's more or less anxiety, but there was not any particular problem on the build test done before the publication of this book. @@ -835,7 +837,7 @@ using MinGW is probably good. Below, I'll explain how to build with Visual C++ and MinGW, but only about the outlines. For more detailed explanations and how to build with Borland C++ Compiler, -they are included in @doc/build.html@ of the attached CD-ROM, +they are included in `doc/build.html` of the attached CD-ROM, thus I'd like you to check it when it is necessary. @@ -848,7 +850,7 @@ run Visual C++ itself. Since a batch file for this purpose came with Visual C++, let's execute it first. -```TODO-lang +``` C:\> cd "\Program Files\Microsoft Visual Studio .NET\Vc7\bin" C:\Program Files\Microsoft Visual Studio .NET\Vc7\bin> vcvars32 ``` @@ -857,16 +859,16 @@ C:\Program Files\Microsoft Visual Studio .NET\Vc7\bin> vcvars32 This is the case of Visual C++ .NET. If it is version 6, it can be found in the following place. -```TODO-lang +``` C:\Program Files\Microsoft Visual Studio\VC98\bin\ ``` -After executing @vcvars32@, -all you have to do is to move to the @win32\@ folder of the source tree of -@ruby@ and build. Below, let us suppose the source tree is in @C:\src@. +After executing `vcvars32`, +all you have to do is to move to the `win32\` folder of the source tree of +`ruby` and build. Below, let us suppose the source tree is in `C:\src`. -```TODO-lang +``` C:\> cd src\ruby C:\src\ruby> cd win32 C:\src\ruby\win32> configure @@ -875,17 +877,17 @@ C:\src\ruby\win32> nmake DESTDIR="C:\Program Files\ruby" install ``` -Then, @ruby@ command would be installed in @C:\Program Files\ruby\bin\@, -and Ruby libraries would be in @C:\Program Files\ruby\lib\@. -Because @ruby@ does not use registries and such at all, -you can uninstall it by deleting @C:\Program Files\ruby@ and below. +Then, `ruby` command would be installed in `C:\Program Files\ruby\bin\`, +and Ruby libraries would be in `C:\Program Files\ruby\lib\`. +Because `ruby` does not use registries and such at all, +you can uninstall it by deleting `C:\Program Files\ruby` and below. #### MinGW As described before, MinGW is only an environment to compile, -thus the general UNIX tools like @sed@ or @sh@ are not available. -However, because they are necessary to build @ruby@, +thus the general UNIX tools like `sed` or `sh` are not available. +However, because they are necessary to build `ruby`, you need to obtain it from somewhere. For this, there are also two methods: Cygwin and MSYS (Minimal SYStem). @@ -895,23 +897,23 @@ the building contest performed before the publication of this book. On the contrary, in the way of using Cygwin, it can pass very straightforwardly. Therefore, in this book, I'll explain the way of using Cygwin. -First, install MinGW and the entire developing tools by using @setup.exe@ of +First, install MinGW and the entire developing tools by using `setup.exe` of Cygwin. Both Cygwin and MinGW are also included in the attached CD-ROM. \footnote{Cygwin and MinGW……See also doc/win.html of the attached CD-ROM} -After that, all you have to do is to type as follows from @bash@ prompt of Cygwin. +After that, all you have to do is to type as follows from `bash` prompt of Cygwin. -```TODO-lang +``` ~/src/ruby % ./configure --with-gcc='gcc -mno-cygwin' \ --enable-shared i386-mingw32 ~/src/ruby % make ~/src/ruby % make install ``` -That's it. Here the line of @configure@ spans multi-lines but in practice +That's it. Here the line of `configure` spans multi-lines but in practice we'd write it on one line and the backslash is not necessary. -The place to install is @\usr\local\@ and below of the drive on which it is +The place to install is `\usr\local\` and below of the drive on which it is compiled. Because really complicated things occur around here, the explanation -would be fairly long, so I'll explain it comprehensively in @doc/build.html@ of +would be fairly long, so I'll explain it comprehensively in `doc/build.html` of the attached CD-ROM. @@ -920,40 +922,41 @@ the attached CD-ROM. Building Details ================ -Until here, it has been the @README@-like description. +Until here, it has been the `README`-like description. This time, let's look at exactly what is done by what we have been done. However, the talks here partially require very high-level knowledge. If you can't understand, I'd like you to skip this and directly jump to the next section. This should be written so that you can understand by coming back after reading the entire book. -Now, on whichever platform, building @ruby@ is separated into three phases. -Namely, @configure@, @make@ and @make install@. -As considering the explanation about @make install@ unnecessary, -I'll explain the @configure@ phase and the @make@ phase. +Now, on whichever platform, building `ruby` is separated into three phases. +Namely, `configure`, `make` and `make install`. +As considering the explanation about `make install` unnecessary, +I'll explain the `configure` phase and the `make` phase. -### @configure@ +### `configure` -First, @configure@. Its content is a shell script, and we detect the system +First, `configure`. Its content is a shell script, and we detect the system parameters by using it. For example, "whether there's the header file -@setjmp.h@" or "whether @alloca()@ is available", these things are checked. +`setjmp.h`" or "whether `alloca()` is available", these things are checked. The way to check is unexpectedly simple. -|_. Target to check |_. Method | -| commands | execute it actually and then check @$?@ | + +| Target to check | Method | +| commands | execute it actually and then check `$?` | | header files | if [ -f $includedir/stdio.h ] | | functions | compile a small program and check whether linking is success | When some differences are detected, somehow it should be reported to us. The way to report is, -the first way is @Makefile@. -If we put a @Makefile.in@ in which parameters are embedded in the form of -`@param@`, it would generate a @Makefile@ in which they are substituted +the first way is `Makefile`. +If we put a `Makefile.in` in which parameters are embedded in the form of +`param`, it would generate a `Makefile` in which they are substituted with the actual values. For example, as follows, -```TODO-lang +``` Makefile.in: CFLAGS = @CFLAGS@ ↓ Makefile : CFLAGS = -g -O2 @@ -963,12 +966,13 @@ Makefile : CFLAGS = -g -O2 Alternatively, it writes out the information about, for instance, whether there are certain functions or particular header files, into a header file. Because the output file name can be changed, it is different depending on each -program, but it is @config.h@ in @ruby@. -I'd like you to confirm this file is created after executing @configure@. +program, but it is `config.h` in `ruby`. +I'd like you to confirm this file is created after executing `configure`. Its content is something like this.

config.h

-```TODO-lang + +```c : : #define HAVE_SYS_STAT_H 1 @@ -990,19 +994,20 @@ Its content is something like this. Each meaning is easy to understand. -@HAVE_xxxx_H@ probably indicates whether a certain header file exists, -@SIZEOF_SHORT@ must indicate the size of the @short@ type of C. -Likewise, @SIZEOF_INT@ indicates the byte length of @int@, -@HAVE_OFF_T@ indicates whether the @offset_t@ type is defined or not. +`HAVE_xxxx_H` probably indicates whether a certain header file exists, +`SIZEOF_SHORT` must indicate the size of the `short` type of C. +Likewise, `SIZEOF_INT` indicates the byte length of `int`, +`HAVE_OFF_T` indicates whether the `offset_t` type is defined or not. As we can understand from the above things, -@configure@ does detect the differences but it does not automatically absorb the +`configure` does detect the differences but it does not automatically absorb the differences. Bridging the difference is left to each programmer. For example, as follows, ▼ A typical usage of the `HAVE_` macro -```TODO-lang + +```c 24 #ifdef HAVE_STDLIB_H 25 # include 26 #endif @@ -1014,28 +1019,28 @@ For example, as follows, -### @autoconf@ +### `autoconf` -@configure@ is not a `ruby`-specific tool. +`configure` is not a `ruby`-specific tool. Whether there are functions, there are header files, ... it is obvious that these tests have regularity. It is wasteful if each person who writes a program wrote each own distinct tool. -Here a tool named @autoconf@ comes in. -In the files named @configure.in@ or @configure.ac@, +Here a tool named `autoconf` comes in. +In the files named `configure.in` or `configure.ac`, write about "I'd like to do these checks", -process it with @autoconf@, -then an adequate @configure@ would be generated. -The @.in@ of @configure.in@ is probably an abbreviation of @input@. -It's the same as the relationship between @Makefile@ and @Makefile.in@. -@.ac@ is, of course, an abbreviation of @AutoConf@. +process it with `autoconf`, +then an adequate `configure` would be generated. +The `.in` of `configure.in` is probably an abbreviation of `input`. +It's the same as the relationship between `Makefile` and `Makefile.in`. +`.ac` is, of course, an abbreviation of `AutoConf`. To illustrate this talk up until here, it would be like Figure 1.
- figure 1: The process until @Makefile@ is created -
figure 1: The process until @Makefile@ is created
+ figure 1: The process until `Makefile` is created +
figure 1: The process until `Makefile` is created
For the readers who want to know more details, @@ -1043,51 +1048,51 @@ I recommend "GNU Autoconf/Automake/Libtool" Gary V.Vaughan, Ben Elliston, Tom Tromey, Ian Lance Taylor. By the way, `ruby`'s `configure` is, as said before, generated by using -@autoconf@, but not all the @configure@ in this world are generated with -@autoconf@. It can be written by hand or another tool to automatically generate -can be used. Anyway, it's sufficient if ultimately there are @Makefile@ and -@config.h@ and many others. +`autoconf`, but not all the `configure` in this world are generated with +`autoconf`. It can be written by hand or another tool to automatically generate +can be used. Anyway, it's sufficient if ultimately there are `Makefile` and +`config.h` and many others. -### @make@ +### `make` -At the second phase, @make@, what is done? -Of course, it would compile the source code of @ruby@, -but when looking at the output of @make@, +At the second phase, `make`, what is done? +Of course, it would compile the source code of `ruby`, +but when looking at the output of `make`, I feel like there are many other things it does. I'll briefly explain the process of it. -* compile the source code composing @ruby@ itself -* create the static library @libruby.a@ gathering the crucial parts of @ruby@ -* create "@miniruby@", which is an always statically-linked @ruby@ -* create the shared library @libruby.so@ when @--enable-shared@ -* compile the extension libraries (under @ext/@) by using @miniurby@ -* At last, generate the real @ruby@ +* compile the source code composing `ruby` itself +* create the static library `libruby.a` gathering the crucial parts of `ruby` +* create `miniruby`, which is an always statically-linked `ruby` +* create the shared library `libruby.so` when `--enable-shared` +* compile the extension libraries (under `ext/`) by using `miniurby` +* At last, generate the real `ruby` -There are two reasons why it creates @miniruby@ and @ruby@ separately. -The first one is that compiling the extension libraries requires @ruby@. -In the case when @--enable-shared@, @ruby@ itself is dynamically linked, +There are two reasons why it creates `miniruby` and `ruby` separately. +The first one is that compiling the extension libraries requires `ruby`. +In the case when `--enable-shared`, `ruby` itself is dynamically linked, thus there's a possibility not be able to run instantly because of the load -paths of the libraries. Therefore, create @miniruby@, which is statically +paths of the libraries. Therefore, create `miniruby`, which is statically linked, and use it during the building process. The second reason is, in a platform where we cannot use shared libraries, -there's a case when the extension libraries are statically linked to @ruby@ -itself. In this case, it cannot create @ruby@ before compiling all extension -libraries, but the extension libraries cannot be compiled without @ruby@. -In order to resolve this dilemma, it uses @miniruby@. +there's a case when the extension libraries are statically linked to `ruby` +itself. In this case, it cannot create `ruby` before compiling all extension +libraries, but the extension libraries cannot be compiled without `ruby`. +In order to resolve this dilemma, it uses `miniruby`. -@CVS@ +`CVS` ===== -The @ruby@ archive included in the attached CD-ROM is, +The `ruby` archive included in the attached CD-ROM is, as the same as the official release package, -just a snapshot which is an appearance at just a particular moment of @ruby@, +just a snapshot which is an appearance at just a particular moment of `ruby`, which is a continuously changing program. -How @ruby@ has been changed, why it has been so, these things are not described +How `ruby` has been changed, why it has been so, these things are not described there. Then what is the way to see the entire picture including the past. We can do it by using CVS. @@ -1103,7 +1108,7 @@ system and CVS is the most famous open-source source code management system in this world. -Since @ruby@ is also managed with CVS, +Since `ruby` is also managed with CVS, I'll explain a little about the mechanism and usage of CVS. First, the most important idea of CVS is repository and working-copy. I said CVS is something like an undo list of editor, @@ -1177,16 +1182,16 @@ Next it would be 1.3 then 1.4. Keeping in mind the above things, I'll talk about the usage of CVS very very briefly. -First, @cvs@ command is essential, so I'd like you to install it beforehand. -The source code of @cvs@ is included in the attached CD-ROM +First, `cvs` command is essential, so I'd like you to install it beforehand. +The source code of `cvs` is included in the attached CD-ROM \footnote{cvsarchives/cvs-1.11.2.tar.gz}. -How to install @cvs@ is really far from the main line, +How to install `cvs` is really far from the main line, thus it won't be explained here. -After installing it, let's checkout the source code of @ruby@ as an experiment. +After installing it, let's checkout the source code of `ruby` as an experiment. Type the following commands when you are online. -```TODO-lang +``` % cvs -d :pserver:anonymous@cvs.ruby-lang.org:/src login CVS Password: anonymous % cvs -d :pserver:anonymous@cvs.ruby-lang.org:/src checkout ruby @@ -1195,33 +1200,33 @@ CVS Password: anonymous Any options were not specified, thus the edge version would be automatically checked out. -The truly edge version of @ruby@ must appear under @ruby/@. +The truly edge version of `ruby` must appear under `ruby/`. Additionally, if you'd like to obtain the version of a certain day, -you can use @-D@ option of @cvs checkout@. +you can use `-D` option of `cvs checkout`. By typing as follows, you can obtain a working copy of the version which is being explained by this book. -```TODO-lang +``` % cvs -d :pserver:anonymous@cvs.ruby-lang.org:/src checkout -D2002-09-12 ruby ``` -At this moment, you have to write options immediately after @checkout@. -If you wrote "@ruby@" first, it would cause a strange error complaining "missing +At this moment, you have to write options immediately after `checkout`. +If you wrote `"ruby"` first, it would cause a strange error complaining "missing a module". And, with the anonymous access like this example, we cannot check in. In order to practice checking in, it's good to create a (local) repository and store a "Hello, World!" program in it. The concrete way to store is not explained here. -The manual coming with @cvs@ is fairly friendly. +The manual coming with `cvs` is fairly friendly. Regarding books which you can read in Japanese, I recommend translated "Open Source Development with CVS" Karl Fogel, Moshe Bar. -The composition of @ruby@ +The composition of `ruby` ========================= ### The physical structure @@ -1231,17 +1236,17 @@ but what is the thing we should do first? It is looking over the directory structure. In most cases, the directory structure, meaning the source tree, directly indicate the module structure of the program. -Abruptly searching @main()@ by using @grep@ and reading from the top in its +Abruptly searching `main()` by using `grep` and reading from the top in its processing order is not smart. -Of course finding out @main()@ is also important, -but first let's take time to do @ls@ or @head@ to grasp the whole picture. +Of course finding out `main()` is also important, +but first let's take time to do `ls` or `head` to grasp the whole picture. Below is the appearance of the top directory immediately after checking out from the CVS repository. What end with a slash are subdirectories. -```TODO-lang +``` COPYING compar.c gc.c numeric.c sample/ COPYING.ja config.guess hash.c object.c signal.c CVS/ config.sub inits.c pack.c sprintf.c @@ -1265,15 +1270,15 @@ class.c file.c node.h rubytest.rb Recently the size of a program itself has become larger, and there are many softwares whose subdirectories are divided into pieces, -but @ruby@ has been consistently used the top directory for a long time. +but `ruby` has been consistently used the top directory for a long time. It becomes problematic if there are too many files, but we can get used to this amount. The files at the top level can be categorized into six: * documents -* the source code of @ruby@ itself -* the tool to build @ruby@ +* the source code of `ruby` itself +* the tool to build `ruby` * standard extension libraries * standard Ruby libraries * the others @@ -1283,15 +1288,15 @@ The source code and the build tool are obviously important. Aside from them, I'll list up what seems useful for us. -* @ChangeLog@ +* `ChangeLog` -The records of changes on @ruby@. +The records of changes on `ruby`. This is very important when investigating the reason of a certain change. -* @README.EXT README.EXT.ja@ +* `README.EXT README.EXT.ja` How to create an extension library is described, -but in the course of it, things relating to the implementation of @ruby@ itself +but in the course of it, things relating to the implementation of `ruby` itself are also written. @@ -1299,97 +1304,100 @@ are also written. ### Dissecting Source Code -From now on, I'll further split the source code of @ruby@ itself into more tiny -pieces. As for the main files, its categorization is described in @README.EXT@, +From now on, I'll further split the source code of `ruby` itself into more tiny +pieces. As for the main files, its categorization is described in `README.EXT`, thus I'll follow it. Regarding what is not described, I categorized it by myself. #### Ruby Language Core -| @class.c@ | class relating API | -| @error.c@ | exception relating API | -| @eval.c@ | evaluator | -| @gc.c@ | garbage collector | -| @lex.c@ | reserved word table | -| @object.c@ | object system | -| @parse.y@ | parser | -| @variable.c@ | constants, global variables, class variables | -| @ruby.h@ | The main macros and prototypes of @ruby@ | -| @intern.h@ | the prototypes of C API of @ruby@. -@intern@ seems to be an abbreviation of internal, but the functions written here -can be used from extension libraries. | -| @rubysig.h@ | the header file containing the macros relating to signals | -| @node.h@ | the definitions relating to the syntax tree nodes | -| @env.h@ | the definitions of the structs to express the context of the evaluator | - - -The parts to compose the core of the @ruby@ interpreter. +| File | Description | +| ------------ | ---------------------------------------------------------------------- | +| `class.c` | class relating API | +| `error.c` | exception relating API | +| `eval.c` | evaluator | +| `gc.c` | garbage collector | +| `lex.c` | reserved word table | +| `object.c` | object system | +| `parse.y` | parser | +| `variable.c` | constants, global variables, class variables | +| `ruby.h` | The main macros and prototypes of `ruby` | +| `intern.h` | the prototypes of C API of `ruby`. `intern` seems to be an abbreviation of internal, but the functions written here can be used from extension libraries. | +| `rubysig.h` | the header file containing the macros relating to signals | +| `node.h` | the definitions relating to the syntax tree nodes | +| `env.h` | the definitions of the structs to express the context of the evaluator | + + +The parts to compose the core of the `ruby` interpreter. The most of the files which will be explained in this book are contained here. -If you consider the number of the files of the entire @ruby@, +If you consider the number of the files of the entire `ruby`, it is really only a few. But if you think based on the byte size, 50% of the entire amount is occupied by these files. -Especially, @eval.c@ is 200KB, @parse.y@ is 100KB, these files are large. +Especially, `eval.c` is 200KB, `parse.y` is 100KB, these files are large. #### Utility -| dln.c | dynamic loader | -| regex.c | regular expression engine | -| st.c | hash table | -| util.c | libraries for radix conversions and sort and so on | +| File | Description | +| --------- | -------------------------------------------------- | +| `dln.c` | dynamic loader | +| `regex.c` | regular expression engine | +| `st.c` | hash table | +| `util.c` | libraries for radix conversions and sort and so on | -It means utility for @ruby@. +It means utility for `ruby`. However, some of them are so large that you cannot imagine it from the word -"utility". For instance, @regex.c@ is 120 KB. +"utility". For instance, `regex.c` is 120 KB. -#### Implementation of @ruby@ command +#### Implementation of `ruby` command -| @dmyext.c@ | dummy of the routine to initialize extension libraries ( DumMY EXTension ) | -| @inits.c@ | the entry point for core and the routine to initialize -extension libraries | -| @main.c@ | the entry point of @ruby@ command (this is unnecessary for -@libruby@ ) | -| @ruby.c@ | the main part of @ruby@ command (this is also necessary for -@libruby@ ) | -| @version.c@ | the version of @ruby@ | +| File | Description | +| ------------------- | -------------------------------------------------------------------------- | +| `dmyext.c` | dummy of the routine to initialize extension libraries ( DumMY EXTension ) | +| `inits.c` | the entry point for core and the routine to initialize extension libraries | +| `main.c` | the entry point of `ruby` command (this is unnecessary for `libruby` ) | +| `ruby.c` | the main part of `ruby` command (this is also necessary for `libruby` ) | +| `version.c` | the version of `ruby` | -The implementation of @ruby@ command, -which is of when typing @ruby@ on the command line and execute it. +The implementation of `ruby` command, +which is of when typing `ruby` on the command line and execute it. This is the part, for instance, to interpret the command line options. -Aside from @ruby@ command, as the commands utilizing @ruby@ core, -there are @mod_ruby@ and @vim@. -These commands are functioning by linking to the @libruby@ library -(@.a@/@.so@/@.dll@ and so on). +Aside from `ruby` command, as the commands utilizing `ruby` core, +there are `mod_ruby` and `vim`. +These commands are functioning by linking to the `libruby` library +(`.a`/`.so`/`.dll` and so on). #### Class Libraries -| @array.c@ | @class Array@ | -| @bignum.c@ | @class Bignum@ | -| @compar.c@ | @module Comparable@ | -| @dir.c@ | @class Dir@ | -| @enum.c@ | @module Enumerable@ | -| @file.c@ | @class File@ | -| @hash.c@ | @class Hash@ (Its actual body is @st.c@) | -| @io.c@ | @class IO@ | -| @marshal.c@ | @module Marshal@ | -| @math.c@ | @module Math@ | -| @numeric.c@ | @class Numeric@, @Integer@, @Fixnum@, @Float@ | -| @pack.c@ | @Array#pack@, @String#unpack@ | -| @prec.c@ | @module Precision@ | -| @process.c@ | @module Process@ | -| @random.c@ | @Kernel#srand()@, @rand()@ | -| @range.c@ | @class Range@ | -| @re.c@ | @class Regexp@ (Its actual body is @regex.c@) | -| @signal.c@ | @module Signal@ | -| @sprintf.c@ | @ruby@-specific @sprintf()@ | -| @string.c@ | @class String@ | -| @struct.c@ | @class Struct@ | -| @time.c@ | @class Time@ | +| File | Ruby equivalent | +| ----------- | --------------------------------------------- | +| `array.c` | `class Array` | +| `bignum.c` | `class Bignum` | +| `compar.c` | `module Comparable` | +| `dir.c` | `class Dir` | +| `enum.c` | `module Enumerable` | +| `file.c` | `class File` | +| `hash.c` | `class Hash` (Its actual body is `st.c`) | +| `io.c` | `class IO` | +| `marshal.c` | `module Marshal` | +| `math.c` | `module Math` | +| `numeric.c` | `class Numeric`, `Integer`, `Fixnum`, `Float` | +| `pack.c` | `Array#pack`, `String#unpack` | +| `prec.c` | `module Precision` | +| `process.c` | `module Process` | +| `random.c` | `Kernel#srand()`, `rand()` | +| `range.c` | `class Range` | +| `re.c` | `class Regexp` (Its actual body is `regex.c`) | +| `signal.c` | `module Signal` | +| `sprintf.c` | `ruby`-specific `sprintf()` | +| `string.c` | `class String` | +| `struct.c` | `class Struct` | +| `time.c` | `class Time` | The implementations of the Ruby class libraries. What listed here are basically implemented in the completely same way as the @@ -1400,25 +1408,27 @@ examples of how to write an extension library. #### Files depending on a particular platform -| @bcc32/@ | Borland C++ (Win32) | -| @beos/@ | BeOS | -| @cygwin/@ | Cygwin (the UNIX simulation layer on Win32) | -| @djgpp/@ | djgpp (the free developing environment for DOS) | -| @vms/@ | VMS (an OS had been released by DEC before) | -| @win32/@ | Visual C++ (Win32) | -| @x68/@ | Sharp X680x0 series (OS is Human68k) | +| Folder | Description | +| --------- | ----------------------------------------------- | +| `bcc32/` | Borland C++ (Win32) | +| `beos/` | BeOS | +| `cygwin/` | Cygwin (the UNIX simulation layer on Win32) | +| `djgpp/` | djgpp (the free developing environment for DOS) | +| `vms/` | VMS (an OS had been released by DEC before) | +| `win32/` | Visual C++ (Win32) | +| `x68/` | Sharp X680x0 series (OS is Human68k) | Each platform-specific code is stored. #### fallback functions -```TODO-lang +``` missing/ ``` Files to offset the functions which are missing on each platform. -Mainly functions of @libc@. +Mainly functions of `libc`. @@ -1451,10 +1461,10 @@ Part 1 is from chapter 2 to chapter 7. The second one is parser. Probably some preliminary explanations are necessary for this. -@ruby@ command is the interpreter of Ruby language. +`ruby` command is the interpreter of Ruby language. It means that it analyzes the input which is a text on invocation and executes it by following it. -Therefore, @ruby@ needs to be able to interpret the meaning of the program +Therefore, `ruby` needs to be able to interpret the meaning of the program written as a text, but unfortunately text is very hard to understand for computers. For computers, text files are merely byte sequences and nothing more than that. In order to comprehend the meaning of text from it, some special gimmick @@ -1464,16 +1474,16 @@ which can be easily handled from the program. The internal expression is called "syntax tree". Syntax tree expresses a program by a tree structure, -for instance, figure 4 shows how an @if@ statement is expressed. +for instance, figure 4 shows how an `if` statement is expressed.
- figure 4: an @if@ statement and its corresponding syntax tree -
figure 4: an @if@ statement and its corresponding syntax tree
+ figure 4: an `if` statement and its corresponding syntax tree +
figure 4: an `if` statement and its corresponding syntax tree
Parser will be described in Part 2 "Syntactic Analysis". Part 2 is from chapter 10 to chapter 12. -Its target file is only @parse.y@. +Its target file is only `parse.y`. #### Evaluator @@ -1487,17 +1497,17 @@ However, the third one, evaluator, this is completely elusive. What evaluator does is "executing" a program by following a syntax tree. This sounds easy, but what is "executing"? To answer this question precisely is fairly difficult. -What is "executing an @if@ statement"? -What is "executing a @while@ statement"? +What is "executing an `if` statement"? +What is "executing a `while` statement"? What does "assigning to a local variable" mean? We cannot understand evaluator without answering all of such questions clearly and precisely. In this book, evaluator will be discussed in Part 3 "Evaluate". -Its target file is @eval.c@. -@eval@ is an abbreviation of "evaluator". +Its target file is `eval.c`. +`eval` is an abbreviation of "evaluator". -Now, I've described briefly about the structure of @ruby@, +Now, I've described briefly about the structure of `ruby`, however even though the ideas were explained, it does not so much help us understand the behavior of program. -In the next chapter, we'll start with actually using @ruby@. +In the next chapter, we'll start with actually using `ruby`. diff --git a/iterator.md b/iterator.md index d27a680..fcea399 100644 --- a/iterator.md +++ b/iterator.md @@ -25,7 +25,7 @@ First, let's think about a small program as below:

▼The Source Program

-```TODO-lang +```ruby iter_method() do 9 # a mark to find this block end diff --git a/load.md b/load.md index b314f31..8db93e1 100644 --- a/load.md +++ b/load.md @@ -603,6 +603,7 @@ non essential things have already been removed.

▼ `load_file()` (simplified edition)

+ ```TODO-lang static void load_file(fname, /* script=0 */) diff --git a/parser.md b/parser.md index cb1bc50..707d0d4 100644 --- a/parser.md +++ b/parser.md @@ -38,7 +38,7 @@ Let's now look at `parse.y` in a bit more detail. The following figure presents a rough outline of the contents of `parse.y`. ▼ parse.y -```TODO-lang +```yacc %{ header %} @@ -119,7 +119,7 @@ modified version of `exyacc.rb`\footnote{modified `exyacc.rb`:`tools/exyacc2.rb` located on the attached CD-ROM} to extract the grammar rules. ▼ `parse.y`(rules) -```TODO-lang +```yacc program : compstmt bodystmt : compstmt @@ -200,7 +200,7 @@ With adding `arg` to them, let's look at their rules. ▼ `ruby` grammar (outline) -```TODO-lang +```yacc program : compstmt compstmt : stmts opt_terms @@ -248,7 +248,7 @@ we can clearly make out a hierarchy of `program`→`stmt`→`expr`→`arg`→ Also, we'd like to focus on this rule of `primary`. -```TODO-lang +```yacc primary : literal : : @@ -269,7 +269,7 @@ reduced during parsing. The next rule is also particularly interesting. -```TODO-lang +```yacc primary : literal : : @@ -290,7 +290,7 @@ function, be used as the right hand side of an expression etc. This is an incredible fact. Let's actually confirm it. -```TODO-lang +```ruby p((class C; end)) p((def a() end)) p((alias ali gets)) @@ -301,7 +301,7 @@ p((1 + 1 * 1 ** 1 - 1 / 1 ^ 1)) If we invoke `ruby` with the `-c` option (syntax check), we get the following output. -```TODO-lang +``` % ruby -c primprog.rb Syntax OK ``` @@ -325,7 +325,7 @@ one. ### `program` ▼ `program` -```TODO-lang +```yacc program : compstmt compstmt : stmts opt_terms @@ -347,7 +347,7 @@ terminate the sentences, such as semicolons or newlines. `opt_terms` means "OPTional terms". The definitions are as follows: ▼ `opt_terms` -```TODO-lang +```yacc opt_terms : | terms @@ -360,7 +360,7 @@ term : ';' The initial `;` or `\n` of a `terms` can be followed by any number of `;` only; based on that, you might start thinking that if there are 2 or more consecutive newlines, it could cause a problem. Let's try and see what actually happens. -```TODO-lang +```ruby 1 + 1 # first newline # second newline # third newline @@ -369,7 +369,7 @@ The initial `;` or `\n` of a `terms` can be followed by any number of `;` only; Run that with `ruby -c`. -```TODO-lang +``` % ruby -c optterms.rb Syntax OK ``` @@ -385,7 +385,7 @@ To generalize this point, the grammar rules can be divided into 2 groups: those Next is `stmt`. This one is rather involved, so we'll look into it a bit at a time. ▼ `stmt`(1) -```TODO-lang +```yacc stmt : kALIAS fitem fitem | kALIAS tGVAR tGVAR | kALIAS tGVAR tBACK_REF @@ -407,7 +407,7 @@ Looking at that, somehow things start to make sense. The first few have `alias`, As explained earlier, `klBEGIN` and `klEND` represent `BEGIN` and `END`. ▼ `stmt`(2) -```TODO-lang +```yacc | lhs '=' command_call | mlhs '=' command_call | var_lhs tOP_ASGN command_call @@ -439,7 +439,8 @@ This abbreviation is used at a lot of places in the source code of `ruby`.

▼ `stmt` (3)

-```TODO-lang + +```yacc | lhs '=' mrhs_basic | mlhs '=' mrhs ``` @@ -451,7 +452,8 @@ We've come to recognize that knowing the meanings of names makes the comprehensi

▼ `stmt` (4)

-```TODO-lang + +```yacc | expr ``` @@ -465,7 +467,8 @@ Lastly, it joins to `expr`.

▼ `expr`

-```TODO-lang + +```yacc expr : kRETURN call_args | kBREAK call_args | kNEXT call_args @@ -491,7 +494,7 @@ it would cause conflicts tremendously. However, these two below are of different kind. -```TODO-lang +```yacc expr kAND expr expr kOR expr ``` @@ -505,7 +508,7 @@ at least they need to be `expr` to go well. For example, the following usage is possible ... -```TODO-lang +```ruby valid_items.include? arg or raise ArgumentError, 'invalid arg' # valid_items.include?(arg) or raise(ArgumentError, 'invalid arg') ``` @@ -515,21 +518,18 @@ However, if the rule of `kOR` existed in `arg` instead of `expr`, it would be joined as follows. -```TODO-lang +```ruby valid_items.include?((arg or raise)) ArgumentError, 'invalid arg' ``` - Obviously, this would end up a parse error. - - - ### `arg`

▼ `arg`

-```TODO-lang + +```yacc arg : lhs '=' arg | var_lhs tOP_ASGN arg | primary_value '[' aref_args ']' tOP_ASGN arg @@ -584,7 +584,7 @@ this is virtually only a mere enumeration. Let's cut the "mere enumeration" out from the `arg` rule by merging. -```TODO-lang +```yacc arg: lhs '=' arg /* 1 */ | primary T_opeq arg /* 2 */ | arg T_infix arg /* 3 */ @@ -637,7 +637,8 @@ We could say it's a matter of course. Because `primary` has a lot of grammar rules, we'll split them up and show them in parts.

▼ `primary` (1)

-```TODO-lang + +```yacc primary : literal | strings | xstring @@ -652,7 +653,8 @@ Literals.

▼ `primary` (2)

-```TODO-lang + +```yacc | var_ref | backref | tFID @@ -668,7 +670,8 @@ even if it appears solely, it becomes a method call at the parser level.

▼ `primary` (3)

-```TODO-lang + +```yacc | kBEGIN bodystmt kEND @@ -679,7 +682,8 @@ It means this is the `begin` of the exception control.

▼ `primary` (4)

-```TODO-lang + +```yacc | tLPAREN_ARG expr ')' | tLPAREN compstmt ')' ``` @@ -688,7 +692,8 @@ This has already described. Syntax demoting.

▼ `primary` (5)

-```TODO-lang + +```yacc | primary_value tCOLON2 tCONSTANT | tCOLON3 cname ``` @@ -717,7 +722,8 @@ it becomes `tCOLON3`. In the other cases, it becomes `tCOLON2`.

▼ `primary` (6)

-```TODO-lang + +```yacc | primary_value '[' aref_args ']' ``` @@ -725,7 +731,8 @@ Index-form calls, for instance, `arr[i]`.

▼ `primary` (7)

-```TODO-lang + +```yacc | tLBRACK aref_args ']' | tLBRACE assoc_list '}' ``` @@ -750,7 +757,8 @@ so I'd like you to make use of it when reading.

▼ `primary` (8)

-```TODO-lang + +```yacc | kRETURN | kYIELD '(' call_args ')' | kYIELD '(' ')' @@ -770,7 +778,7 @@ it does not mean you cannot pass values, of course. There was the following rule in `expr`. -```TODO-lang +```yacc kRETURN call_args ``` @@ -794,7 +802,8 @@ the next chapter "Finite-State Scanner".

▼ `primary` (9)

-```TODO-lang + +```yacc | operation brace_block | method_call | method_call brace_block @@ -812,7 +821,8 @@ the next chapter "Finite-State Scanner".

▼ `primary` (10)

-```TODO-lang + +```yacc | kIF expr_value then compstmt if_tail kEND # if | kUNLESS expr_value then compstmt opt_else kEND # unless | kWHILE expr_value do compstmt kEND # while @@ -847,7 +857,8 @@ In the next section, we'll think about this point again.

▼ `primary` (11)

-```TODO-lang + +```yacc | kCLASS cname superclass bodystmt kEND # class definition | kCLASS tLSHFT expr term bodystmt kEND # singleton class definition | kMODULE cname bodystmt kEND # module definition @@ -866,7 +877,8 @@ it would never be a problem.

▼ `primary` (12)

-```TODO-lang + +```yacc | kBREAK | kNEXT | kREDO @@ -891,7 +903,7 @@ but explaining instinctively is relatively easy. Here, let's simulate with a small rule defined as follows: -```TODO-lang +```yacc %token A B o %% element : A item_list B @@ -915,7 +927,7 @@ For an element of the list, the `o` or `element` is nesting. With the parser based on this grammar, let's try to parse the following input. -```TODO-lang +``` A A o o o B o A o A o o o B o B B ``` @@ -936,7 +948,7 @@ Figure 4 shows the consequence. However, if the ending `B` is missing, ... -```TODO-lang +```yacc %token A o %% element : A item_list /* B is deleted for an experiment */ @@ -955,7 +967,7 @@ If we simply take `B` out from the previous one, The input would be as follows. -```TODO-lang +``` A A o o o o A o A o o o o ``` @@ -1026,33 +1038,24 @@ by `yyparse()`. The rest functions in `parse.y` are auxiliary functions called by `yylex()`, and these can also be clearly categorized. - First, the input buffer is at the lowest level of the scanner. `ruby` is designed so that you can input source programs via both Ruby `IO` objects and strings. The input buffer hides that and makes it look like a single byte stream. - The next level is the token buffer. It reads 1 byte at a time from the input buffer, and keeps them until it will form a token. - Therefore, the whole structure of `yylex` can be depicted as Figure 7. -
figure 7: The whole picture of the scanner
figure 7: The whole picture of the scanner
- - - - ### The input buffer - Let's start with the input buffer. Its interfaces are only the three: `nextc()`, `pushback()`, `peek()`. @@ -1062,7 +1065,8 @@ The variables used by the input buffer are the followings:

▼ the input buffer

-```TODO-lang + +```yacc 2279 static char *lex_pbeg; 2280 static char *lex_p; 2281 static char *lex_pend; @@ -1091,6 +1095,7 @@ First, I'll start with `nextc()` that seems the most orthodox.

▼ `nextc()`

+ ```TODO-lang 2468 static inline int 2469 nextc() @@ -1144,7 +1149,7 @@ it's definite that each line comes in at a time. Here is the summary: -```TODO-lang +``` if ( reached the end of the buffer ) if (still there's more input) read the next line @@ -1161,7 +1166,8 @@ The variables used by this function are shown together in the following.

▼ `lex_getline()`

-```TODO-lang + +```yacc 2276 static VALUE (*lex_gets)(); /* gets function */ 2277 static VALUE lex_input; /* non-nil if File */ @@ -1186,6 +1192,7 @@ I searched the place where setting `lex_gets` and this is what I found:

▼ set `lex_gets`

+ ```TODO-lang 2430 NODE* 2431 rb_compile_string(f, s, line) @@ -1220,6 +1227,7 @@ On the other hand, `lex_get_str()` is defined as follows:

▼ `lex_get_str()`

+ ```TODO-lang 2398 static int lex_gets_ptr; @@ -1270,6 +1278,7 @@ we can understand the rest easily. `pushback()` writes back a character. If put it in C, it is `ungetc()`.

▼ `pushback()`

+ ```TODO-lang 2501 static void 2502 pushback(c) @@ -1290,6 +1299,7 @@ we can understand the rest easily.

▼ `peek()`

+ ```TODO-lang 2509 #define peek(c) (lex_p != lex_pend && (c) == *lex_p) @@ -1318,6 +1328,7 @@ There are the five interfaces as follows: Now, we'll start with the data structures.

▼ The Token Buffer

+ ```TODO-lang 2271 static char *tokenbuf = NULL; 2272 static int tokidx, toksiz = 0; @@ -1344,6 +1355,7 @@ read `newtok()`, which starts a new token.

▼ `newtok()`

+ ```TODO-lang 2516 static char* 2517 newtok() @@ -1381,6 +1393,7 @@ Next, let's look at the `tokadd()` to add a character to token buffer.

▼ `tokadd()`

+ ```TODO-lang 2531 static void 2532 tokadd(c) @@ -1407,6 +1420,7 @@ as `calloc()`. The rest interfaces are summarized below.

▼ `tokfix() tok() toklen() toklast()`

+ ```TODO-lang 2511 #define tokfix() (tokenbuf[tokidx]='\0') 2512 #define tok() tokenbuf @@ -1431,6 +1445,7 @@ First, I'll show the whole structure that some parts of it are left out.

▼ `yylex` outline

+ ```TODO-lang 3106 static int 3107 yylex() @@ -1518,6 +1533,7 @@ Let's start with what is simple first.

▼ `yylex` - `'!'`

+ ```TODO-lang 3205 case '!': 3206 lex_state = EXPR_BEG; @@ -1578,6 +1594,7 @@ Next, we'll try to look at `'<'` as an example of using `yylval` (the value of a

▼ `yylex`−`'>'`

+ ```TODO-lang 3296 case '>': 3297 switch (lex_state) { @@ -1639,6 +1656,7 @@ The code of `':'` shown below is an example that a space changes the behavior.

▼ `yylex`−`':'`

+ ```TODO-lang 3761 case ':': 3762 c = nextc(); @@ -1708,6 +1726,7 @@ This is relatively long, so I'll show it with comments.

▼ `yylex` -- identifiers

+ ```TODO-lang 4081 case '@': /* an instance variable or a class variable */ 4082 c = nextc(); @@ -1868,6 +1887,7 @@ The definition of `struct kwtable` is as follows:

▼ `kwtable`

+ ```TODO-lang 1 struct kwtable {char *name; int id[2]; enum lex_state state;}; @@ -1885,6 +1905,7 @@ This is the place where actually looking up.

▼ `yylex()` -- identifier -- call `rb_reserved_word()`

+ ```TODO-lang 4173 struct kwtable *kw; 4174 @@ -1905,6 +1926,7 @@ The double quote (`"`) part of `yylex()` is this.

▼ `yylex` − `'"'`

+ ```TODO-lang 3318 case '"': 3319 lex_strterm = NEW_STRTERM(str_dquote, '"', 0); @@ -1920,6 +1942,7 @@ Then, this time, when taking a look at the rule,

▼ rules for strings

+ ```TODO-lang string1 : tSTRING_BEG string_contents tSTRING_END @@ -1983,6 +2006,7 @@ What plays an important role there is ...

▼ the beginning of `yylex()`

+ ```TODO-lang 3106 static int 3107 yylex() @@ -2015,6 +2039,7 @@ This is done in the following part:

▼ `string_content`

+ ```TODO-lang 1916 string_content : .... 1917 | tSTRING_DBEG term_push @@ -2073,6 +2098,7 @@ First, let's look at its type.

▼ `lex_strterm`

+ ```TODO-lang 72 static NODE *lex_strterm; @@ -2089,6 +2115,7 @@ you should remember only these two points.

▼ `NEW_STRTERM()`

+ ```TODO-lang 2865 #define NEW_STRTERM(func, term, paren) \ 2866 rb_node_newnode(NODE_STRTERM, (func), (term), (paren)) @@ -2120,6 +2147,7 @@ The available types are decided as follows:

▼ `func`

+ ```TODO-lang 2775 #define STR_FUNC_ESCAPE 0x01 /* backslash notations such as \n are in effect */ 2776 #define STR_FUNC_EXPAND 0x02 /* embedded expressions are in effect */ @@ -2160,6 +2188,7 @@ The rest is reading `yylex()` in the string mode, in other words, the `if` at the beginning.

▼ `yylex`− string

+ ```TODO-lang 3114 if (lex_strterm) { 3115 int token; @@ -2212,6 +2241,7 @@ First, I'll show the code of `yylex()` to scan the starting symbol of a here doc

▼ `yylex`−`'<'`

+ ```TODO-lang 3260 case '<': 3261 c = nextc(); @@ -2236,6 +2266,7 @@ Therefore, here is `heredoc_identifier()`.

▼ `heredoc_identifier()`

+ ```TODO-lang 2926 static int 2927 heredoc_identifier() @@ -2294,6 +2325,7 @@ Notice that `lex_strterm` remains unchanged after it was set at `heredoc_identif

▼ `here_document()`(simplified)

+ ```TODO-lang here_document(NODE *here) { @@ -2338,6 +2370,7 @@ And finally, leaving the `do` ~ `while` loop, it is `heredoc_restore()`.

▼ `heredoc_restore()`

+ ```TODO-lang 2990 static void 2991 heredoc_restore(here) diff --git a/security.md b/security.md index 137241c..b0aa5b8 100644 --- a/security.md +++ b/security.md @@ -85,7 +85,7 @@ program, then it should be made into a different thread and have its security level raised. I haven't yet explained how to create a thread, but I will show an example here: -```TODO-lang +```ruby # Raise the security level in a different thread p($SAFE) # 0 is the default Thread.fork { # Start a different thread @@ -146,7 +146,7 @@ The taint mark is, to be concrete, the `FL_TAINT` flag, which is set to Here is its usage. -```TODO-lang +```c OBJ_TAINT(obj) /* set FL_TAINT to obj */ OBJ_TAINTED(obj) /* check if FL_TAINT is set to obj */ OBJ_INFECT(dest, src) /* infect FL_TAINT from src to dest */ @@ -158,7 +158,8 @@ let's briefly look over only `OBJ_INFECT()`.

▼ `OBJ_INFECT`

-```TODO-lang + +```c 441 #define OBJ_INFECT(x,s) do { \ if (FL_ABLE(x) && FL_ABLE(s)) \ RBASIC(x)->flags |= RBASIC(s)->flags & FL_TAINT; \ @@ -179,7 +180,8 @@ it would propagate the flag.

▼ `ruby_safe_level`

-```TODO-lang + +```c 124 int ruby_safe_level = 0; 7401 static void @@ -224,7 +226,8 @@ you can ignore the interface and modify the security level.

▼ `rb_secure()`

-```TODO-lang + +```c 136 void 137 rb_secure(level) 138 int level; diff --git a/spec.md b/spec.md index 31a731b..fdc01b4 100644 --- a/spec.md +++ b/spec.md @@ -40,7 +40,7 @@ other Ruby's literals. #### Single Quoted Strings -```TODO-lang +```ruby 'string' # 「string」 '\\begin{document}' # 「\begin{document}」 '\n' # 「\n」backslash and an n, not a newline @@ -60,7 +60,7 @@ And Ruby's strings aren't divided by newline characters. If we write a string over several lines the newlines are contained in the string. -```TODO-lang +```ruby 'multi line string' @@ -70,14 +70,14 @@ And if the `-K` option is given to the `ruby` command, multibyte strings will be accepted. At present the three encodings EUC-JP (`-Ke`), Shift JIS (`-Ks`), and UTF8 (`-Ku`) can be specified. -```TODO-lang +```ruby '「漢字が通る」と「マルチバイト文字が通る」はちょっと違う' # 'There's a little difference between "Kanji are accepted" and "Multibyte characters are accepted".' ``` #### Double Quoted Strings -```TODO-lang +```ruby "string" # 「string」 "\n" # newline "\x0f" # a byte given in hexadecimal form @@ -100,7 +100,7 @@ are no limitations like only one variable or only one method. Getting this far, it is not a mere literal anymore but the entire thing can be considered as an expression to express a string. -```TODO-lang +```ruby "embedded #{lvar} expression" "embedded #{@ivar} expression" "embedded #{1 + 1} expression" @@ -110,7 +110,7 @@ the entire thing can be considered as an expression to express a string. #### Strings with `%` -```TODO-lang +```ruby %q(string) # same as 'string' %Q(string) # same as "string" %(string) # same as %Q(string) or "string" @@ -122,7 +122,7 @@ changed by using `%`. In the following example, the same string is written as a `"`-string and `%`-string. -```TODO-lang +```ruby "
" %Q() ``` @@ -136,7 +136,7 @@ Here we have used parentheses as delimiters, but something else is fine, too. Like brackets or braces or `#`. Almost every symbol is fine, even `%`. -```TODO-lang +```ruby %q#this is string# %q[this is string] %q%this is string% @@ -151,7 +151,7 @@ When using here document, the lines between the line which contains the starting `<▼ the structure to manage threads

-```TODO-lang +```c 864 typedef struct thread * rb_thread_t; 865 static rb_thread_t curr_thread = 0; 866 static rb_thread_t main_thread; @@ -234,7 +234,7 @@ The spaces for both purposes are respectively prepared in `rb_thread_t`.

▼ `struct thread` (partial)

-```TODO-lang +```c 7301 struct thread { 7302 struct thread *next, *prev; 7303 jmp_buf context; @@ -295,7 +295,7 @@ The place to store the stack properly exists in `struct thread`.

▼ `struct thread` (partial)

-```TODO-lang +```c 7310 int stk_len; /* the stack length */ 7311 int stk_max; /* the size of memory allocated for stk_ptr */ 7312 VALUE*stk_ptr; /* the copy of the stack */ @@ -346,7 +346,7 @@ Below is the interface of `getc`.

▼ `rb_getc()`

-```TODO-lang +```c 1185 int 1186 rb_getc(f) 1187 FILE *f; @@ -382,7 +382,7 @@ What is it? Let's see the inside of `rb_thread_wait_fd()`.

▼ `rb_thread_wait_fd()`

-```TODO-lang +```c 8047 void 8048 rb_thread_wait_fd(fd) 8049 int fd; @@ -435,7 +435,7 @@ Then by scanning, I found it in the function named `rb_thread_join()`.

▼ `rb_thread_join()` (partial)

-```TODO-lang +```c 8227 static int 8228 rb_thread_join(th, limit) 8229 rb_thread_t th; @@ -472,7 +472,7 @@ This is the substance of (Ruby's) `sleep` and such.

▼ `rb_thread_wait_for` (simplified)

-```TODO-lang +```c 8080 void 8081 rb_thread_wait_for(time) 8082 struct timeval time; @@ -527,7 +527,7 @@ It is here.

▼ `catch_timer()`

-```TODO-lang +```c 8574 static void 8575 catch_timer(sig) 8576 int sig; @@ -555,7 +555,7 @@ then it was used around here:

▼ `rb_thread_start_0()` (partial)

-```TODO-lang +```c 8620 static VALUE 8621 rb_thread_start_0(fn, arg, th_arg) 8622 VALUE (*fn)(); @@ -607,7 +607,7 @@ Then, I'd like you to see the code of `catch_timer()` again. -```TODO-lang +```c if (rb_trap_immediate) { rb_thread_schedule(); } @@ -630,7 +630,7 @@ This variable is used in the following place.

▼ `CHECK_INTS` − `HAVE_SETITIMER`

-```TODO-lang +```c 73 #if defined(HAVE_SETITIMER) && !defined(__BOW__) 74 EXTERN int rb_thread_pending; 75 # define CHECK_INTS do {\ @@ -671,7 +671,7 @@ It is the definition of the `#else` side.

▼ `CHECK_INTS` − `not HAVE_SETITIMER`

-```TODO-lang +```c 84 EXTERN int rb_thread_tick; 85 #define THREAD_TICK 500 86 #define CHECK_INTS do {\ @@ -718,7 +718,7 @@ Let's exhaustively divide it into portions.

▼ `rb_thread_schedule()` (outline)

-```TODO-lang +```c 7819 void 7820 rb_thread_schedule() 7821 { @@ -787,7 +787,7 @@ Its prototype is this: -```TODO-lang +```c int select(int max, fd_set *readset, fd_set *writeset, fd_set *exceptset, struct timeval *timeout); @@ -809,7 +809,7 @@ I'll talk about `fd_set` in detail.

▼ `fd_set` maipulation

-```TODO-lang +```c fd_set set; FD_ZERO(&set) /* initialize */ @@ -831,9 +831,9 @@ Figure 2: fd_set I'll show a simple usage example of `select`. -

▼ a usage exmple of `select`

+

▼ a usage example of `select`

-```TODO-lang +```c #include #include #include @@ -880,7 +880,7 @@ I'll show the content in shortened form.

▼ `rb_thread_schedule()` − preparations for `select`

-```TODO-lang +```c 7848 again: /* initialize the variables relating to select */ 7849 max = -1; @@ -921,7 +921,7 @@ These two are defined as follows:

▼ `FOREACH_THREAD_FROM`

-```TODO-lang +```c 7360 #define FOREACH_THREAD_FROM(f,x) x = f; do { x = x->next; 7361 #define END_FOREACH_FROM(f,x) } while (x != f) @@ -933,7 +933,7 @@ Let's extract them for better understandability. -```TODO-lang +```c th = curr; do { th = th->next; @@ -970,7 +970,7 @@ As for its code, let's see it just in case.

▼ `rb_thread_schedule()` − `select` preparation − `join` wait

-```TODO-lang +```c 7861 if (th->wait_for & WAIT_JOIN) { 7862 if (rb_thread_dead(th->join)) { 7863 th->status = THREAD_RUNNABLE; @@ -1003,7 +1003,7 @@ and let it only check if I/O was completed.

▼ `rb_thread_schedule()` − `select`

-```TODO-lang +```c 7904 if (need_select) { 7905 /* convert delay into timeval */ 7906 /* if theres immediately invocable threads, do only I/O checks */ @@ -1085,7 +1085,7 @@ Since all what was invocable and all what had finished waiting and so on became

▼ `rb_thread_schedule()` − decide the next thread

-```TODO-lang +```c 7996 FOREACH_THREAD_FROM(curr, th) { 7997 if (th->status == THREAD_TO_KILL) { /*(A)*/ 7998 next = th; @@ -1164,7 +1164,7 @@ I'll go with a significantly simplified version.

▼ `rb_thread_schedule()` (context switch)

-```TODO-lang +```c if (THREAD_SAVE_CONTEXT(curr)) { return; } @@ -1178,7 +1178,7 @@ we need to extract the content at several places in order to understand.

▼ `THREAD_SAVE_CONTEXT()`

-```TODO-lang +```c 7619 #define THREAD_SAVE_CONTEXT(th) \ 7620 (rb_thread_save_context(th),thread_switch(setjmp((th)->context))) @@ -1211,7 +1211,7 @@ If I merge the three then extract it, here is the result: -```TODO-lang +```c rb_thread_save_context(curr); switch (setjmp(curr->context)) { case 0: @@ -1236,16 +1236,13 @@ Since it does `longjmp()` in `rb_thread_restore_context()`, we can expect the correspondence between `setjmp()` and `longjmp()`. And if we will imagine the meaning also from the function names, - - -```TODO-lang +``` save the context of the current thread setjmp restore the context of the next thread longjmp ``` - The rough main flow would probably look like this. However what we have to be careful about here is, this pair of `setjmp()` and `longjmp()` is not completed in this thread. @@ -1276,7 +1273,7 @@ Now, we'll start with `rb_thread_save_context()`, which saves a context.

▼ `rb_thread_save_context()` (simplified)

-```TODO-lang +```c 7539 static void 7540 rb_thread_save_context(th) 7541 rb_thread_t th; @@ -1322,7 +1319,7 @@ possible. (Figure 4)
(twodirection)
-Fig.4: a stack extending above and a stack extending below +Fig.4: a stack extending above and a stack extending below
@@ -1350,7 +1347,7 @@ which is the function to restore a thread.

▼ `rb_thread_restore_context()`

-```TODO-lang +```c 7635 static void 7636 rb_thread_restore_context(th, exit) 7637 rb_thread_t th; @@ -1422,7 +1419,7 @@ This is done by the `stack_extend()` in the first half.

▼ `stack_extend()`

-```TODO-lang +```c 7624 static void 7625 stack_extend(th, exit) 7626 rb_thread_t th; diff --git a/variable.md b/variable.md index b2c118e..c6c2b80 100644 --- a/variable.md +++ b/variable.md @@ -116,6 +116,7 @@ parameter and the one with the shorter one takes `char*`. Because the one taking an `ID` seems closer to the internals, we'll look at it. ▼ `rb_cvar_get()` + ```c 1508 VALUE 1509 rb_cvar_get(klass, id) @@ -192,6 +193,7 @@ Could it mean that the constants are also... the constant `id` in the class `klass` to the value `val`. ▼ `rb_const_set()` + ```c 1377 void 1378 rb_const_set(klass, id, val) @@ -208,6 +210,7 @@ the constant `id` in the class `klass` to the value `val`. `mod_av_set()` does all the hard work: ▼ `mod_av_set()` + ```c 1352 static void 1353 mod_av_set(klass, id, val, isconst) diff --git a/yacc.md b/yacc.md index a649383..0970677 100644 --- a/yacc.md +++ b/yacc.md @@ -134,7 +134,7 @@ It's good to have a look at the file once. The grammar file's content has the following form: ▼ General form of the grammar file -```TODO-lang +``` %{ Header %} From 17ef055234e56ef7407ce151bbccb91a2a703d10 Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Sat, 17 Apr 2021 20:58:33 +0200 Subject: [PATCH 06/14] preface --- preface.md | 166 ++++++++++++++++++++++------------------------------- 1 file changed, 70 insertions(+), 96 deletions(-) diff --git a/preface.md b/preface.md index c7e4142..5e98bde 100644 --- a/preface.md +++ b/preface.md @@ -1,25 +1,22 @@ --- layout: default +title: Preface --- Preface ======= - This book explores several themes with the following goals in mind: - -* To have knowledge of the structure of @ruby@ +* To have knowledge of the structure of `ruby` * To gain knowledge about language processing systems in general * To acquire skills in reading source code - Ruby is an object-oriented language developed by Yukihiro Matsumoto. The -official implementation of the Ruby language is called @ruby@. It is actively +official implementation of the Ruby language is called `ruby`. It is actively developed and maintained by the open source community. Our first goal is to -understand the inner-workings of the @ruby@ implementation. This book is going -to investigate @ruby@ as a whole. - +understand the inner-workings of the `ruby` implementation. This book is going +to investigate `ruby` as a whole. Secondly, by knowing about the implementation of Ruby, we will be able to know about other language processing systems. I tried to cover all topics necessary @@ -31,13 +28,11 @@ implementation are adequately explained. And a brief summary of Ruby language itself is also included so that readers who don't know about Ruby can read this book. - The main themes of this book are the first and the second point above. Though, what I want to emphasize the most is the third one: To acquire skill in reading source code. I dare to say it's a "hidden" theme. I will explain why I thought it is necessary. - It is often said "To be a skilled programmer, you should read source code written by others." This is certainly true. But I haven't found a book that explains how you can actually do it. There are many books that explain OS @@ -45,32 +40,28 @@ kernels and the interior of language processing systems by showing the concrete structure or "the answer," but they don't explain the way to reach that answer. It's clearly one-sided. - Can you, perhaps, naturally read code just because you know how to write a program? Is it true that reading codes is so easy that all people in this world can read code written by others with no sweat? I don't think so. Reading programs is certainly as difficult as writing programs. - -Therefore, this book does not simply explain @ruby@ as something already known, +Therefore, this book does not simply explain `ruby` as something already known, rather demonstrate the analyzing process as graphic as possible. Though I think I'm a reasonably seasoned Ruby programmer, -I did not fully understand the inner structure of @ruby@ at the time when I +I did not fully understand the inner structure of `ruby` at the time when I started to write this book. -In other words, regarding the content of @ruby@, +In other words, regarding the content of `ruby`, I started from the position as close as possible to readers. This book is the summary of both the analyzing process started from that point and its result. - -I asked Yukihiro Matsumoto, the author of @ruby@, for supervision. But I +I asked Yukihiro Matsumoto, the author of `ruby`, for supervision. But I thought the spirit of this book would be lost if each analysis was monitored by the author of the language himself. Therefore I limited his review to the final stage of writing. In this way, without loosing the sense of actually reading the source codes, I think I could also assure the correctness of the contents. - To be honest, this book is not easy. In the very least, it is limited in its simplicity by the inherent complexity of its aim. However, this complexity may be what makes the book interesting to you. Do you find it interesting to be @@ -90,12 +81,10 @@ knowledge of the Ruby language is absolutely necessary to understand certain explanations of its structure, supplementary explanations of the language are inserted here and there. - Knowledge about the C language is required, to some extent. I assume you can -allocate some structs with @malloc()@ at runtime to create a list or a stack +allocate some structs with `malloc()` at runtime to create a list or a stack and you have experience of using function pointers at least a few times. - Also, since the basics of object-oriented programming will not be explained so seriously, without having any experience of using at least one of object-oriented languages, you will probably have a difficult time. @@ -106,70 +95,65 @@ Structure of this book This book has four main parts: - -| Part 1: Objects | -| Part 2: Syntactic analysis | -| Part 3: Evaluation | -| Part 4: Peripheral around the evaluator | - +* Part 1: Objects +* Part 2: Syntactic analysis +* Part 3: Evaluation +* Part 4: Peripheral around the evaluator Supplementary chapters are included at the beginning of each part when necessary. These provide a basic introduction for those who are not familiar with Ruby and the general mechanism of a language processing system. - Now, we are going through the overview of the four main parts. The symbol in parentheses after the explanation indicates the difficulty gauge. They are ==(C)==, (B), (A) in order of easy to hard, (S) being the highest. #### Part 1: Object -| Chapter1 | Focuses the basics of Ruby to get ready to accomplish Part 1. ==(C)== | -| Chapter2 | Gives concrete inner structure of Ruby objects. ==(C)== | -| Chapter3 | States about hash table. ==(C)== | -| Chapter4 | Writes about Ruby class system. You may read through this chapter quickly at first, because it tells plenty of abstract stories. (A) | -| Chapter5 | Shows the garbage collector which is responsible for generating and releasing objects. The first story in low-level series. (B) | -| Chapter6 | Describes the implementation of global variables, class variables, and constants. ==(C)== | -| Chapter7 | Outline of the security features of Ruby. ==(C)== | +* Chapter1 — Focuses the basics of Ruby to get ready to accomplish Part 1. ==(C)== +* Chapter2 — Gives concrete inner structure of Ruby objects. ==(C)== +* Chapter3 — States about hash table. ==(C)== +* Chapter4 — Writes about Ruby class system. You may read through this chapter quickly at first, because it tells plenty of abstract stories. (A) +* Chapter5 — Shows the garbage collector which is responsible for generating and releasing objects. The first story in low-level series. (B) +* Chapter6 — Describes the implementation of global variables, class variables, and constants. ==(C)== +* Chapter7 — Outline of the security features of Ruby. ==(C)== #### Part 2: Syntactic analysis -| Chapter8 | Talks about almost complete specification of the Ruby language, in order to prepare for Part 2 and Part 3. ==(C)== | -| Chapter9 | Introduction to @yacc@ required to read the syntax file at least. (B) | -| Chapter10 | Look through the rules and physical structure of the parser. (A) | -| Chapter11 | Explore around the peripherals of @lex_state@, which is the most difficult part of the parser. The most difficult part of this book. (S) | -| Chapter12 | Finalization of Part 2 and connection to Part 3. ==(C)== | +* Chapter8 — Talks about almost complete specification of the Ruby language, in order to prepare for Part 2 and Part 3. ==(C)== +* Chapter9 — Introduction to `yacc` required to read the syntax file at least. (B) +* Chapter10 — Look through the rules and physical structure of the parser. (A) +* Chapter11 — Explore around the peripherals of `lex_state`, which is the most difficult part of the parser. The most difficult part of this book. (S) +* Chapter12 — Finalization of Part 2 and connection to Part 3. ==(C)== #### Part 3: Evaluator -| Chapter13 | Describe the basic mechanism of the evaluator. ==(C)== | -| Chapter14 | Reads the evaluation stack that creates the main context of Ruby. (A) | -| Chapter15 | Talks about search and initialization of methods. (B) | -| Chapter16 | Defies the implementation of the iterator, the most characteristic feature of Ruby. (A) | -| Chapter17 | Describe the implementation of the eval methods. (B) | +* Chapter13 — Describe the basic mechanism of the evaluator. ==(C)== +* Chapter14 — Reads the evaluation stack that creates the main context of Ruby. (A) +* Chapter15 — Talks about search and initialization of methods. (B) +* Chapter16 — Defies the implementation of the iterator, the most characteristic feature of Ruby. (A) +* Chapter17 — Describe the implementation of the eval methods. (B) #### Part 4: Peripheral around the evaluator -| Chapter18 | Run-time loading of libraries in C and Ruby. (B) | -| Chapter19 | Describes the implementation of thread at the end of the core part. (A) | +* Chapter18 — Run-time loading of libraries in C and Ruby. (B) +* Chapter19 — Describes the implementation of thread at the end of the core part. (A) Environment =========== -This book describes on @ruby@ 1.7.3 2002-09-12 version. It's attached on the -CD-ROM. Choose any one of @ruby-rhg.tar.gz@, @ruby-rhg.lzh@, or @ruby-rhg.zip@ +This book describes on `ruby` 1.7.3 2002-09-12 version. It's attached on the +CD-ROM. Choose any one of `ruby-rhg.tar.gz`, `ruby-rhg.lzh`, or `ruby-rhg.zip` according to your convenience. Content is the same for all. Alternatively you can obtain from the support site (footnote{`http://i.loveruby.net/ja/rhg/`}) of this book. - For the publication of this book, the following build environment was prepared for confirmation of compiling and testing the basic operation. The details of -this build test are given in @doc/buildtest.html@ in the attached CD-ROM. +this build test are given in `doc/buildtest.html` in the attached CD-ROM. However, it doesn't necessarily assume the probability of the execution even under the same environment listed in the table. The author doesn't guarantee -in any form the execution of @ruby@. - +in any form the execution of `ruby`. * BeOS 5 Personal Edition/i386 * Debian GNU/Linux potato/i386 @@ -200,32 +184,29 @@ in any form the execution of @ruby@. * Windows 2000 (Borland C++ Compiler 5.5, Visual C++ 6, Visual C++.NET) * Windows XP (Visual C++.NET, MinGW+Cygwin) - These numerous tests aren't of a lone effort by the author. Those test build couldn't be achieved without magnificent cooperations by the people listed below. I'd like to extend warmest thanks from my heart. - -| Tietew | -| kjana | -| nyasu | -| sakazuki | -| Masahiro Sato | -| Kenichi Tamura | -| Morikyu | -| Yuya Kato | -| Takehiro Kubo | -| Kentaro Goto | -| Tomoyuki Shimomura | -| Masaki Sukeda | -| Koji Arai | -| Kazuhiro Nishiyama | -| Shinya Kawaji | -| Tetsuya Watanabe | -| Naokuni Fujimoto | - +* Tietew +* kjana +* nyasu +* sakazuki +* Masahiro Sato +* Kenichi Tamura +* Morikyu +* Yuya Kato +* Takehiro Kubo +* Kentaro Goto +* Tomoyuki Shimomura +* Masaki Sukeda +* Koji Arai +* Kazuhiro Nishiyama +* Shinya Kawaji +* Tetsuya Watanabe +* Naokuni Fujimoto However, the author owes the responsibility for this test. Please refrain from attempting to contact these people directly. If there's any flaw in execution, @@ -246,49 +227,44 @@ Acknowledgment First of all, I would like to thank Mr. Yukihiro Matsumoto. He is the author of Ruby, and he made it in public as an open source software. Not only he -willingly approved me to publish a book about analyzing @ruby@, but also he +willingly approved me to publish a book about analyzing `ruby`, but also he agreed to supervise the content of it. In addition, he helped my stay in Florida with simultaneous translation. There are plenty of things beyond enumeration I have to say thanks to him. Instead of writing all the things, I give this book to him. - Next, I would like to thank arton, who proposed me to publish this book. The words of arton always moves me. One of the things I'm currently struggled due to his words is that I have no reason I don't get a .NET machine. - Koji Arai, the 'captain' of documentation in the Ruby society, conducted a scrutiny review as if he became the official editor of this book while I was not told so. I thank all his review. - Also I'd like to mention those who gave me comments, pointed out mistakes and submitted proposals about the construction of the book throughout all my work. -Tietew, -Yuya, -Kawaji, -Gotoken, -Tamura, -Funaba, -Morikyu, -Ishizuka, -Shimomura, -Kubo, -Sukeda, -Nishiyama, -Fujimoto, -Yanagawa, -(I'm sorry if there's any people missing), +Tietew,
+Yuya,
+Kawaji,
+Gotoken,
+Tamura,
+Funaba,
+Morikyu,
+Ishizuka,
+Shimomura,
+Kubo,
+Sukeda,
+Nishiyama,
+Fujimoto,
+Yanagawa,
+(I'm sorry if there's any people missing),
I thank all those people contributed. - As a final note, I thank Otsuka , Haruta, and Kanemitsu who you for arranging everything despite my broke deadline as much as four times, and that the manuscript exceeded 200 pages than originally planned. - I cannot expand the full list here to mention the name of all people contributed to this book, but I say that I couldn't successfully publish this book without such assistance. Let me take this place to express my @@ -296,11 +272,9 @@ appreciation. Thank you very much. p(right). Minero Aoki - If you want to send remarks, suggestions and reports of typographcal errors, please address to [Minero Aoki <aamine@loveruby.net>](mailto:aamine@loveruby.net). - "Rubyソースコード完全解説" can be reserved/ordered at ImpressDirect. [(Jump to the introduction page)](http://direct.ips.co.jp/directsys/go_x_TempChoice.cfm?sh_id=EE0040&spm_id=1&GM_ID=1721) From 76e0ba5f32d1ea2e38fa7e4910113ecfa4e1b810 Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Sat, 17 Apr 2021 21:07:39 +0200 Subject: [PATCH 07/14] intro --- fin.md | 4 +- intro.md | 126 ++-------------------------------------------------- security.md | 25 +---------- 3 files changed, 5 insertions(+), 150 deletions(-) diff --git a/fin.md b/fin.md index 860272e..9b22f68 100644 --- a/fin.md +++ b/fin.md @@ -203,9 +203,7 @@ disappointments. Therefore, for now, let's just expect slightly. Firstly, the language to use. Definitely it will be C. Mr. Matsumoto said to `ruby-talk`, which is the English mailing list for Ruby, -
-I hate C++. -
+> I hate C++. So, C++ is most unlikely. Even if all the parts will be recreated, it is reasonable that the object system will remain almost the same, diff --git a/intro.md b/intro.md index 14229d5..d5511a2 100644 --- a/intro.md +++ b/intro.md @@ -3,7 +3,6 @@ layout: default title: Introduction --- - Introduction ------------ @@ -14,7 +13,6 @@ Some of the readers may have already been familiar with Ruby, but (I hope) there are also many readers who have not. First let's go though a rough summary of the characteristics of Ruby for such people. - Hereafter capital "Ruby" refers to Ruby as a language specification, and lowercase "ruby" refers to `ruby` command as an implementation. @@ -26,32 +24,26 @@ The specification is merely shown as an implementation as `ruby`, and its varying continuously. For good or bad, it's free. - Furthermore `ruby` itself is a free software. It's probably necessary to mention at least the two points here: The source code is open in public and distributed free of charge. Thanks to such condition, an attempt like this book can be approved. - If you'd like to know the exact licence, you can read `README` and `LEGAL`. For the time being, I'd like you to remember that you can do at least the following things: - * You can redistribute source code of `ruby` * You can modify source code of `ruby` * You can redistribute a copy of source code with your modification - There is no need for special permission and payment in all these cases. - By the way, the purpose of this book is to read the original `ruby`, thus the target source is the one not modified unless it is particularly specified. However, white spaces, new lines and comments were added or removed without asking. - #### It's conservative Ruby is a very conservative language. It is equipped with only carefully chosen @@ -61,12 +53,10 @@ So it has a tendency to appeal to programmers who put importance on practical functionalities. The dyed-in-the-wool hackers like Scheme and Haskell lovers don't seem to find appeal in ruby, at least in a short glance. - The library is conservative in the same way. Clear and unabbreviated names are given for new functions, while names that appears in C and Perl libraries have been taken from them. For example, `printf`, `getpwent`, `sub`, and `tr`. - It is also conservative in implementation. Assembler is not its option for seeking speed. Portability is always considered a higher priority when it conflicts with speed. @@ -76,7 +66,6 @@ conflicts with speed. Ruby is an object-oriented language. It is absolutely impossible to exclude it from the features of Ruby. - I will not give a page to this book about what an object-oriented language is. To tell about an object-oriented feature about Ruby, the expression of the code that just going to be explained is the exact sample. @@ -88,7 +77,6 @@ from the features of Ruby. To gain agreement of everyone, an introduction of Ruby must include "object-oriented" and "script language". - However, what is a "script language" for example? I couldn't figure out the definition successfully. For example, John K. Ousterhout, the author of Tcl/Tk, gives a definition as "executable language using `#!` on UNIX". There are other @@ -96,7 +84,6 @@ definitions depending on the view points, such as one that can express a useful program with only one line, or that can execute the code by passing a program file from the command line, etc. - However, I dare to use another definition, because I don't find much interest in "what" a script language. I have the only one measure to decide to call it a script language, @@ -104,10 +91,8 @@ that is, whether no one would complain about calling it a script language. To fulfill this definition, I would define the meaning of "script language" as follows. - A language that its author calls it a "script language". - I'm sure this definition will have no failure. And Ruby fulfills this point. Therefore I call Ruby a "script language". @@ -119,7 +104,6 @@ It must be because in some points being an interpreter is better than being a compiler ... at least for ruby, it must be better. Well, what is good about being an interpreter? - As a preparation step to investigating into it, let's start by thinking about the difference between an interpreter and a compiler. If the matter is to attempt a theoretical comparison in the process how a program is executed, @@ -129,7 +113,6 @@ language, it may be possible to say it works as an interpreter. Then where is the place that actually makes a difference? It is a more practical place, in the process of development. - I know somebody, as soon as hearing "in the process of development", would claim using a stereotypical phrase, that an interpreter reduces effort of compilation that makes the development procedure easier. But I don't think it's @@ -139,7 +122,6 @@ F5. A claim about a long time for compilation is derived from the size of the project or optimization of the codes. Compilation itself doesn't owe a negative side. - Well, why people perceive an interpreter and compiler so much different like this? I think that it is because the language developers so far have chosen either implementation based on the trait of each language. In other words, @@ -150,13 +132,11 @@ development and accuracy is required, it would be a compiler. That may be because of the speed, as well as the ease of creating a language. - Therefore, I think "it's handy because it's an interpreter" is an outsized myth. Being an interpreter doesn't necessarily contribute the readiness in usage; seeking readiness in usage naturally makes your path toward building an interpreter language. - Anyway, `ruby` is an interpreter; it has an important fact about where this book is facing, so I emphasize it here again. Though I don't know about "it's handy because it is an interpreter", @@ -171,7 +151,6 @@ It has only a few parts written in assembler. Therefore porting to a new platform is comparatively easy. Namely, it works on the following platforms currently. - * Linux * Win32 (Windows 95, 98, Me, NT, 2000, XP) * Cygwin @@ -191,17 +170,14 @@ on the following platforms currently. * OS/2 (emx) * Psion - I heard that the main machine of the author Matsumoto is Linux. Thus when using Linux, you will not fail to compile any time. - Furthermore, you can expect a stable functionality on a (typical) Unix environment. Considering the release cycle of packages, the primary option for the environment to hit around `ruby` should fall on a branch of PC UNIX, currently. - On the other hand, the Win32 environment tends to cause problems definitely. The large gaps in the targeting OS model tend to cause problems around the machine stack and the linker. Yet, recently Windows hackers have contributed to @@ -209,30 +185,25 @@ make better support. I use a native ruby on Windows 2000 and Me. Once it gets successfully run, it doesn't seem to show special concerns like frequent crashing. The main problems on Windows may be the gaps in the specifications. - Another type of OS that many people may be interested in should probably be Mac OS (prior to v9) and handheld OS like Palm. - Around `ruby 1.2` and before, it supported legacy Mac OS, but the development seems to be in suspension. Even a compiling can't get through. The biggest cause is that the compiler environment of legacy Mac OS and the decrease of developers. Talking about Mac OS X, there's no worries because the body is UNIX. - There seem to be discussions the portability to Palm several branches, but I have never heard of a successful project. I guess the difficulty lies in the necessity of settling down the specification-level standards such as `stdio` on the Palm platform, rather than the processes of actual implementation. Well I -saw a porting to Psion has been done. ([ruby-list:36028]). - +saw a porting to Psion has been done. (\[ruby-list:36028\]). How about hot stories about VM seen in Java and .NET? Because I'd like to talk about them combining together with the implementation, this topic will be in the final chapter. - #### Automatic memory control Functionally it's called GC, or Garbage Collection. Saying it in C-language, @@ -241,7 +212,6 @@ detected by the system automatically, and will be released. It's so convenient that once you get used to GC you won't be willing to do such manual memory control again. - The topics about GC have been common because of its popularity in recent languages with GC as a standard set, and it is fun that its algorithms can still be improved further. @@ -254,7 +224,6 @@ advantages of an object-oriented language. Of course a language with variable type has a way to deal with polymorphism. What I mean here is a typeless variables have better conformance. - The level of "better conformance" in this case refers to synonyms like "handy". It's sometimes corresponds to crucial importance, sometimes it doesn't matter practically. Yet, this is certainly an appealing point if a language seeks for @@ -280,7 +249,6 @@ result = cond ? process(val) : 0; This rewrite is possible because the conditional operator (`a?b:c`) is defined as an expression. - On the other hand, in Ruby, you can write as follows because `if` is an expression. ```ruby @@ -336,7 +304,6 @@ If you try to compile it with a C++ compiler, it would warn prototype mismatch and could not compile. ... These kind of stories are often reported to the mailing list. - #### Extension library We can write a Ruby library in C and load it at runtime without recompiling Ruby. @@ -408,11 +375,7 @@ At first, I mention the principle. #### Decide a goal - -
-An important key to reading the source code is to set a concrete goal. -
- +> An important key to reading the source code is to set a concrete goal. This is a word by the author of Ruby, Matsumoto. Indeed, his word is very convincing for me. When the motivation is a spontaneous idea "Maybe I should read a kernel, at least...", @@ -553,7 +516,6 @@ Additionally, if a file contains some modules in it, for each module the functions to compose it should be grouped together, so you can find out the module structure from the order of the functions. - #### Investigating abbreviations As you encounter ambiguous abbreviations, make a list of them and investigate @@ -566,7 +528,6 @@ the initial letters or dropping the vowels. Especially, popular abbreviations in the fields of the target program are used unconditionally, thus you should be familiar with them at an early stage. - #### Understanding data structure If you find both data and code, you should first investigate the data structure. @@ -606,7 +567,6 @@ seeing the actual example, thus it will be explained in the main part. Additionally, when you don't like its coding style, you can convert it by using the tool like `indent`. - #### Experimenting by modifying it as you like It's a mystery of human body, @@ -630,7 +590,6 @@ way. Otherwise, you would be wondering for hours because of a simple your own mistake. And since the purpose of rewriting is getting used to and not rewriting itself, please be careful not to be enthusiastic very much. - ### Reading the history A program often comes with a document which is about the history of changes. @@ -649,7 +608,6 @@ developers, you should get the archives so that you can search over them any time because often there's the information about the exact reason of a certain change. Of course, if you can search online, it's also sufficient. - ### The tools for static analysis Since various tools are available for various purposes, @@ -684,10 +642,6 @@ features. (It's more preferable if you can jump to the location) * function cross-reference - - - - Build ===== @@ -714,13 +668,11 @@ Additionally, it is likely that 1.8, which is the next stable version, will be out in the near future. And the last one is, investigating the edge would make our mood more pleasant. - ### Getting the source code The archive of the target version is included in the attached CD-ROM. In the top directory of the CD-ROM, - ``` ruby-rhg.tar.gz ruby-rhg.zip @@ -738,8 +690,6 @@ For example, the archive of `tar.gz` can be extracted as follows. ~/src % umount /mnt/cdrom ``` - - ### Compiling Just by looking at the source code, you can "read" it. @@ -756,7 +706,6 @@ so it will be described in the next section altogether. However, Cygwin is on Windows but almost Unix, thus I'd like you to read this section for it. - #### Building on a Unix-like OS When it is a Unix-like OS, because generally it is equipped with a C @@ -770,7 +719,6 @@ Let us suppose `~/src/ruby` is the place where the source code is extracted. ~/src/ruby # make install ``` - Below, I'll describe several points to be careful about. On some platforms like Cygwin, UX/4800, @@ -786,12 +734,8 @@ as shared libraries (`libruby.so`). The detailed tutorial about building is included in `doc/build.html` of the attached CD-ROM, I'd like you to try as reading it. - - - #### Building on Windows - If the thing is to build on windows, it becomes way complicated. The source of the problem is, there are multiple building environments. @@ -800,7 +744,6 @@ The source of the problem is, there are multiple building environments. * Cygwin * Borland C++ Compiler - First, the condition of the Cygwin environment is closer to UNIX than Windows, you can follow the building procedures for Unix-like OS. @@ -825,7 +768,6 @@ there's more or less anxiety, but there was not any particular problem on the build test done before the publication of this book. - Then, among the above four environments, which one should we choose? First, basically the Visual C++ version is the most unlikely to cause a problem, thus I recommend it. @@ -840,8 +782,6 @@ For more detailed explanations and how to build with Borland C++ Compiler, they are included in `doc/build.html` of the attached CD-ROM, thus I'd like you to check it when it is necessary. - - #### Visual C++ It is said Visual C++, but usually IDE is not used, we'll build from DOS prompt. @@ -855,7 +795,6 @@ C:\> cd "\Program Files\Microsoft Visual Studio .NET\Vc7\bin" C:\Program Files\Microsoft Visual Studio .NET\Vc7\bin> vcvars32 ``` - This is the case of Visual C++ .NET. If it is version 6, it can be found in the following place. @@ -863,7 +802,6 @@ If it is version 6, it can be found in the following place. C:\Program Files\Microsoft Visual Studio\VC98\bin\ ``` - After executing `vcvars32`, all you have to do is to move to the `win32\` folder of the source tree of `ruby` and build. Below, let us suppose the source tree is in `C:\src`. @@ -876,13 +814,11 @@ C:\src\ruby\win32> nmake C:\src\ruby\win32> nmake DESTDIR="C:\Program Files\ruby" install ``` - Then, `ruby` command would be installed in `C:\Program Files\ruby\bin\`, and Ruby libraries would be in `C:\Program Files\ruby\lib\`. Because `ruby` does not use registries and such at all, you can uninstall it by deleting `C:\Program Files\ruby` and below. - #### MinGW As described before, MinGW is only an environment to compile, @@ -916,9 +852,6 @@ compiled. Because really complicated things occur around here, the explanation would be fairly long, so I'll explain it comprehensively in `doc/build.html` of the attached CD-ROM. - - - Building Details ================ @@ -934,7 +867,6 @@ Namely, `configure`, `make` and `make install`. As considering the explanation about `make install` unnecessary, I'll explain the `configure` phase and the `make` phase. - ### `configure` First, `configure`. Its content is a shell script, and we detect the system @@ -962,7 +894,6 @@ Makefile.in: CFLAGS = @CFLAGS@ Makefile : CFLAGS = -g -O2 ``` - Alternatively, it writes out the information about, for instance, whether there are certain functions or particular header files, into a header file. Because the output file name can be changed, it is different depending on each @@ -992,14 +923,12 @@ Its content is something like this. : ``` - Each meaning is easy to understand. `HAVE_xxxx_H` probably indicates whether a certain header file exists, `SIZEOF_SHORT` must indicate the size of the `short` type of C. Likewise, `SIZEOF_INT` indicates the byte length of `int`, `HAVE_OFF_T` indicates whether the `offset_t` type is defined or not. - As we can understand from the above things, `configure` does detect the differences but it does not automatically absorb the differences. Bridging the difference is left to each programmer. @@ -1015,10 +944,6 @@ For example, as follows, (ruby.h) ``` - - - - ### `autoconf` `configure` is not a `ruby`-specific tool. @@ -1053,7 +978,6 @@ By the way, `ruby`'s `configure` is, as said before, generated by using can be used. Anyway, it's sufficient if ultimately there are `Makefile` and `config.h` and many others. - ### `make` At the second phase, `make`, what is done? @@ -1082,9 +1006,6 @@ itself. In this case, it cannot create `ruby` before compiling all extension libraries, but the extension libraries cannot be compiled without `ruby`. In order to resolve this dilemma, it uses `miniruby`. - - - `CVS` ===== @@ -1096,7 +1017,6 @@ How `ruby` has been changed, why it has been so, these things are not described there. Then what is the way to see the entire picture including the past. We can do it by using CVS. - ### About CVS CVS is shortly an undo list of editors. @@ -1107,7 +1027,6 @@ time. Generally a program doing such job is called source code management system and CVS is the most famous open-source source code management system in this world. - Since `ruby` is also managed with CVS, I'll explain a little about the mechanism and usage of CVS. First, the most important idea of CVS is repository and working-copy. @@ -1115,7 +1034,6 @@ I said CVS is something like an undo list of editor, in order to archive this, the records of every changing history should be saved somewhere. The place to store all of them is "CVS repository". - Directly speaking, repository is what gathers all the past source codes. Of course, this is only a concept, in reality, in order to save spaces, it is stored in the form of @@ -1123,19 +1041,16 @@ one recent appearance and the changing differences (namely, batches). In any ways, it is sufficient if we can obtain the appearance of a particular file of a particular moment any time. - On the other hand, "working copy" is the result of taking files from the repository by choosing a certain point. There's only one repository, but you can have multiple working copies. (Figure 2) -
figure 2: Repository and working copies
figure 2: Repository and working copies
- When you'd like to modify the source code, first take a working copy, edit it by using editor and such, and "return" it. @@ -1150,13 +1065,11 @@ then we can obtain it any time.
figure 3: Checkin and Checkout
- The biggest trait of CVS is we can access it over the networks. It means, if there's only one server which holds the repository, everyone can checkin/checkout over the internet any time. But generally the access to check in is restricted and we can't do it freely. - #### Revision How can we do to obtain a certain version from the repository? @@ -1164,20 +1077,15 @@ One way is to specify with time. By requiring "give me the edge version of that time", it would select it. But in practice, we rarely specify with time. Most commonly, we use something named "revision". - "Revision" and "Version" have the almost same meaning. But usually "version" is attached to the project itself, thus using the word "version" can be confusing. Therefore, the word "revision" is used to indicate a bit smaller unit. - In CVS, the file just stored in the repository is revision 1.1. Checking out it, modifying it, checking in it, then it would be revision 1.2. Next it would be 1.3 then 1.4. - - - #### A simple usage example of CVS Keeping in mind the above things, @@ -1197,7 +1105,6 @@ CVS Password: anonymous % cvs -d :pserver:anonymous@cvs.ruby-lang.org:/src checkout ruby ``` - Any options were not specified, thus the edge version would be automatically checked out. The truly edge version of `ruby` must appear under `ruby/`. @@ -1223,9 +1130,6 @@ The manual coming with `cvs` is fairly friendly. Regarding books which you can read in Japanese, I recommend translated "Open Source Development with CVS" Karl Fogel, Moshe Bar. - - - The composition of `ruby` ========================= @@ -1245,7 +1149,6 @@ Below is the appearance of the top directory immediately after checking out from the CVS repository. What end with a slash are subdirectories. - ``` COPYING compar.c gc.c numeric.c sample/ COPYING.ja config.guess hash.c object.c signal.c @@ -1267,7 +1170,6 @@ bignum.c ext/ mkconfig.rb rubysig.h class.c file.c node.h rubytest.rb ``` - Recently the size of a program itself has become larger, and there are many softwares whose subdirectories are divided into pieces, but `ruby` has been consistently used the top directory for a long time. @@ -1283,7 +1185,6 @@ The files at the top level can be categorized into six: * standard Ruby libraries * the others - The source code and the build tool are obviously important. Aside from them, I'll list up what seems useful for us. @@ -1299,16 +1200,12 @@ How to create an extension library is described, but in the course of it, things relating to the implementation of `ruby` itself are also written. - - - ### Dissecting Source Code From now on, I'll further split the source code of `ruby` itself into more tiny pieces. As for the main files, its categorization is described in `README.EXT`, thus I'll follow it. Regarding what is not described, I categorized it by myself. - #### Ruby Language Core | File | Description | @@ -1327,7 +1224,6 @@ thus I'll follow it. Regarding what is not described, I categorized it by myself | `node.h` | the definitions relating to the syntax tree nodes | | `env.h` | the definitions of the structs to express the context of the evaluator | - The parts to compose the core of the `ruby` interpreter. The most of the files which will be explained in this book are contained here. If you consider the number of the files of the entire `ruby`, @@ -1335,7 +1231,6 @@ it is really only a few. But if you think based on the byte size, 50% of the entire amount is occupied by these files. Especially, `eval.c` is 200KB, `parse.y` is 100KB, these files are large. - #### Utility | File | Description | @@ -1349,8 +1244,6 @@ It means utility for `ruby`. However, some of them are so large that you cannot imagine it from the word "utility". For instance, `regex.c` is 120 KB. - - #### Implementation of `ruby` command | File | Description | @@ -1361,7 +1254,6 @@ However, some of them are so large that you cannot imagine it from the word | `ruby.c` | the main part of `ruby` command (this is also necessary for `libruby` ) | | `version.c` | the version of `ruby` | - The implementation of `ruby` command, which is of when typing `ruby` on the command line and execute it. This is the part, for instance, to interpret the command line options. @@ -1370,8 +1262,6 @@ there are `mod_ruby` and `vim`. These commands are functioning by linking to the `libruby` library (`.a`/`.so`/`.dll` and so on). - - #### Class Libraries | File | Ruby equivalent | @@ -1404,8 +1294,6 @@ What listed here are basically implemented in the completely same way as the ordinary Ruby extension libraries. It means that these libraries are also examples of how to write an extension library. - - #### Files depending on a particular platform | Folder | Description | @@ -1420,7 +1308,6 @@ examples of how to write an extension library. Each platform-specific code is stored. - #### fallback functions ``` @@ -1430,10 +1317,6 @@ missing/ Files to offset the functions which are missing on each platform. Mainly functions of `libc`. - - - - ### Logical Structure Now, there are the above four groups and the core can be divided further into @@ -1446,7 +1329,6 @@ parser converts a program into the internal format, and evaluator actuates the program. Let me explain them in order. - #### Object Space The first one is object space. This is very easy to understand. It is because @@ -1455,7 +1337,6 @@ thus we can directly show or manipulate them by using functions. Therefore, in this book, the explanation will start with this part. Part 1 is from chapter 2 to chapter 7. - #### Parser The second one is parser. Probably some preliminary explanations are necessary @@ -1478,14 +1359,13 @@ for instance, figure 4 shows how an `if` statement is expressed.
figure 4: an `if` statement and its corresponding syntax tree -
figure 4: an `if` statement and its corresponding syntax tree
+
figure 4: an if statement and its corresponding syntax tree
Parser will be described in Part 2 "Syntactic Analysis". Part 2 is from chapter 10 to chapter 12. Its target file is only `parse.y`. - #### Evaluator Objects are easy to understand because they are tangible. diff --git a/security.md b/security.md index b0aa5b8..6005f30 100644 --- a/security.md +++ b/security.md @@ -115,10 +115,8 @@ is common sense that adding new features can make holes easier to open. Therefore it is prudent to think that `ruby` can probably be dangerous. - ### Implementation - From now on, we'll start to look into its implementation. In order to wholly grasp the security system of `ruby`, we have to look at "where is being checked" rather than its mechanism. @@ -128,35 +126,28 @@ Therefore, in this chapter, I'll only describe about the mechanism used for security checks. The APIs to check are mainly these below two: - * `rb_secure(n)` : If more than or equal to level n, it would raise `SecurityError`. * `SafeStringValue()` : If more than or equal to level 1 and a string is tainted, then it would raise an exception. - We won't read `SafeStringValue()` here. - #### Tainted Mark - The taint mark is, to be concrete, the `FL_TAINT` flag, which is set to `basic->flags`, and what is used to infect it is the `OBJ_INFECT()` macro. Here is its usage. - ```c OBJ_TAINT(obj) /* set FL_TAINT to obj */ OBJ_TAINTED(obj) /* check if FL_TAINT is set to obj */ OBJ_INFECT(dest, src) /* infect FL_TAINT from src to dest */ ``` - Since `OBJ_TAINT()` and `OBJ_TAINTED()` can be assumed not important, let's briefly look over only `OBJ_INFECT()`. -

▼ `OBJ_INFECT`

```c @@ -168,16 +159,11 @@ let's briefly look over only `OBJ_INFECT()`. (ruby.h) ``` - `FL_ABLE()` checks if the argument `VALUE` is a pointer or not. If the both objects are pointers (it means each of them has its `flags` member), it would propagate the flag. - - - -#### $SAFE - +#### `$SAFE`

▼ `ruby_safe_level`

@@ -201,30 +187,22 @@ it would propagate the flag. (eval.c) ``` - The substance of `$SAFE` is `ruby_safe_level` in `eval.c`. As I previously wrote, `$SAFE` is local to each thread, It needs to be written in `eval.c` where the implementation of threads is located. In other words, it is in `eval.c` only because of the restrictions of C, but it can essentially be located in another place. - `safe_setter()` is the `setter` of the `$SAFE` global variable. It means, because this function is the only way to access it from Ruby level, the security level cannot be lowered. - However, as you can see, from C level, because `static` is not attached to `ruby_safe_level`, you can ignore the interface and modify the security level. - - - - #### `rb_secure()` -

▼ `rb_secure()`

```c @@ -241,6 +219,5 @@ you can ignore the interface and modify the security level. (eval.c) ``` - If the current safe level is more than or equal to `level`, this would raise `SecurityError`. It's simple. From 89011307f957d5d1cdcbbcf682ccb02e1fb24012 Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Sun, 18 Apr 2021 14:32:32 +0200 Subject: [PATCH 08/14] typo + TODO-lang + evaluator --- anyeval.md | 1 + css/styles.css | 2 +- evaluator.md | 660 +++++++++++++++++++++++++------------------------ gc.md | 131 +++++----- load.md | 3 +- minimum.md | 3 +- module.md | 3 +- spec.md | 4 +- syntree.md | 3 +- variable.md | 2 +- 10 files changed, 409 insertions(+), 403 deletions(-) diff --git a/anyeval.md b/anyeval.md index 0625f92..e30e0bc 100644 --- a/anyeval.md +++ b/anyeval.md @@ -2,6 +2,7 @@ layout: default title: "Chapter 17: Dynamic evaluation" --- + Chapter 17: Dynamic evaluation ------------------------------ diff --git a/css/styles.css b/css/styles.css index ef54ecc..08d92fd 100644 --- a/css/styles.css +++ b/css/styles.css @@ -130,7 +130,7 @@ figcaption { } /* code inlined in text */ -code.highlighter-rouge { +code.highlighter-rouge, code.inline { background: #33a2; border-radius: 2px; padding: 2px; diff --git a/evaluator.md b/evaluator.md index b051fba..02f4d95 100644 --- a/evaluator.md +++ b/evaluator.md @@ -3,7 +3,8 @@ layout: default title: "Chapter 13: Structure of the evaluator" --- -h1(#chapter). Chapter 13: Structure of the evaluator +Chapter 13: Structure of the evaluator +====================================== Outline ======= @@ -58,7 +59,7 @@ I'd like you to remember. -### The characteristics of @ruby's@ evaluator. +### The characteristics of `ruby`'s evaluator. The biggest characteristic of `ruby`'s evaluator is that, as this is also of @@ -66,7 +67,7 @@ the whole `ruby`'s interpretor, the difference in expressions between the C-level code (extension libraries) and the Ruby-level code is small. In ordinary programming languages, the amount of the features of its interpretor we can use from extension -libraries is usually very limited, but there are awfully few limits in @ruby@. +libraries is usually very limited, but there are awfully few limits in `ruby`. Defining classes, defining methods and calling a method without limitation, these can be taken for granted. We can also use exception handling, iterators. Furthermore, threads. @@ -78,28 +79,28 @@ and there are a lot of places implementing the almost same thing twice both for C and Ruby. -Additionally, @ruby@ is a dynamic language, +Additionally, `ruby` is a dynamic language, it means that you can construct and evaluate a string at runtime. -That is @eval@ which is a function-like method. As you expected, +That is `eval` which is a function-like method. As you expected, it is named after "evaluate". By using it, you can even do something like this: -```TODO-lang +```ruby lvar = 1 answer = eval("lvar + lvar") # the answer is 2 ``` -There are also @Module#module_eval@ and @Object#instance_eval@, each method +There are also `Module#module_eval` and `Object#instance_eval`, each method behaves slightly differently. I'll describe about them in detail in Chapter 17: Dynamic evaluation. -### @eval.c@ +### `eval.c` -The evaluator is implemented in @eval.c@. However, this @eval.c@ is a +The evaluator is implemented in `eval.c`. However, this `eval.c` is a really huge file: it has 9000 lines, its size is 200K bytes, and the number of the functions in it is 309. It is hard to fight against. When the size becomes this amount, @@ -114,43 +115,45 @@ I'll list up some ways. The first way is to print the list of the defined functions and look at the -prefixes of them. @rb_dvar_@, @rb_mod_@, @rb_thread@ -- there are plenty of +prefixes of them. `rb_dvar_`, `rb_mod_`, `rb_thread` -- there are plenty of functions with these prefixes. Each prefix clearly indicate a group of the same type of functions. Alternatively, as we can tell when looking at the code of the class libraries, -@Init_xxxx()@ is always put at the end of a block in @ruby@. -Therefore, @Init_xxxx()@ also indicates a break between modules. +`Init_xxxx()` is always put at the end of a block in `ruby`. +Therefore, `Init_xxxx()` also indicates a break between modules. Additionally, the names are obviously important, too. -Since @eval()@ and @rb_eval()@ and @eval_node()@ appear close to each other, +Since `eval()` and `rb_eval()` and `eval_node()` appear close to each other, we naturally think there should be a deep relationship among them. -Finally, in the source code of @ruby@, the definitions of types or variables +Finally, in the source code of `ruby`, the definitions of types or variables and the declarations of prototypes often indicate a break between modules. Being aware of these points when looking, -it seems that @eval.c@ can be mainly divided into these modules listed below: +it seems that `eval.c` can be mainly divided into these modules listed below: -| Safe Level | already explained in Chapter 7: Security | +| Module | Definition | +| -------------------------- | --------------------------------------------------------------- | +| Safe Level | already explained in Chapter 7: Security | | Method Entry Manipulations | finding or deleting syntax trees which are actual method bodies | -| Evaluator Core | the heart of the evaluator that @rb_eval()@ is at its center. | -| Exception | generations of exceptions and creations of backtraces | -| Method | the implementation of method call | -| Iterator | the implementation of functions that are related to blocks | -| Load | loading and evaluating external files | -| @Proc@ | the implementation of @Proc@ | -| Thread | the implementation of Ruby threads | - - -Among them, "Load" and "Thread" are the parts that essentially should not be in @eval.c@. -They are in @eval.c@ merely because of the restrictions of C language. -To put it more precisely, they need the macros such as @PUSH_TAG@ defined in @eval.c@. +| Evaluator Core | the heart of the evaluator that `rb_eval()` is at its center. | +| Exception | generations of exceptions and creations of backtraces | +| Method | the implementation of method call | +| Iterator | the implementation of functions that are related to blocks | +| Load | loading and evaluating external files | +| `Proc` | the implementation of `Proc` | +| Thread | the implementation of Ruby threads | + + +Among them, "Load" and "Thread" are the parts that essentially should not be in `eval.c`. +They are in `eval.c` merely because of the restrictions of C language. +To put it more precisely, they need the macros such as `PUSH_TAG` defined in `eval.c`. So, I decided to exclude the two topics from Part 3 and deal with them at Part 4. And, it's probably all right if I don't explain the safe level here because I've already done in Part 1. @@ -159,29 +162,30 @@ because I've already done in Part 1. Excluding the above three, the six items are left to be described. The below table shows the corresponding chapter of each of them: - +| Item | Chapter | +| -------------------------- | ------------------------- | | Method Entry Manipulations | the next chapter: Context | -| Evaluator Core | the entire part of Part 3 | -| Exception | this chapter | -| Method | Chapter 15: Methods | -| Iterator | Chapter 16: Blocks | -| Proc | Chapter 16: Blocks | +| Evaluator Core | the entire part of Part 3 | +| Exception | this chapter | +| Method | Chapter 15: Methods | +| Iterator | Chapter 16: Blocks | +| Proc | Chapter 16: Blocks | -### From @main@ by way of @ruby_run@ to @rb_eval@ +### From `main` by way of `ruby_run` to `rb_eval` ### Call Graph -The true core of the evaluator is a function called @rb_eval()@. -In this chapter, we will follow the path from @main()@ to that @rb_eval()@. -First of all, here is a rough call graph around @rb_eval@ : +The true core of the evaluator is a function called `rb_eval()`. +In this chapter, we will follow the path from `main()` to that `rb_eval()`. +First of all, here is a rough call graph around `rb_eval` : -```TODO-lang +``` main ....main.c ruby_init ....eval.c ruby_prog_init ....ruby.c @@ -197,31 +201,31 @@ main ....main.c I put the file names on the right side when moving to another file. Gazing this carefully, the first thing we'll notice is that the functions of -@eval.c@ call the functions of @ruby.c@ back. +`eval.c` call the functions of `ruby.c` back. -I wrote it as "calling back" because @main.c@ and @ruby.c@ are relatively for -the implementation of @ruby@ command. @eval.c@ is the implementation of the -evaluator itself which keeps a little distance from @ruby@ command. -In other words, @eval.c@ is supposed to be used by @ruby.c@ and calling the -functions of @ruby.c@ from @eval.c@ makes @eval.c@ less independent. +I wrote it as "calling back" because `main.c` and `ruby.c` are relatively for +the implementation of `ruby` command. `eval.c` is the implementation of the +evaluator itself which keeps a little distance from `ruby` command. +In other words, `eval.c` is supposed to be used by `ruby.c` and calling the +functions of `ruby.c` from `eval.c` makes `eval.c` less independent. Then, why is this in this way? It's mainly because of the restrictions of C language. -Because the functions such as @ruby_prog_init()@ and @ruby_process_options()@ +Because the functions such as `ruby_prog_init()` and `ruby_process_options()` start to use the API of the ruby world, it's possible an exception occurs. However, in order to stop an exception of Ruby, it's necessary to use the macro -named @PUSH_TAG()@ which can only be used in @eval.c@. In other words, essentially, -@ruby_init()@ and @ruby_run()@ should have been defined in @ruby.c@. +named `PUSH_TAG()` which can only be used in `eval.c`. In other words, essentially, +`ruby_init()` and `ruby_run()` should have been defined in `ruby.c`. -Then, why isn't @PUSH_TAG@ an @extern@ function or something which is available +Then, why isn't `PUSH_TAG` an `extern` function or something which is available to other files? -Actually, @PUSH_TAG@ can only be used as a pair with @POP_TAG@ as follows: +Actually, `PUSH_TAG` can only be used as a pair with `POP_TAG` as follows: -```TODO-lang +```c PUSH_TAG(); /* do lots of things */ POP_TAG(); @@ -233,48 +237,48 @@ but not in such way because it's slower. The next thing we notice is, the fact that it sequentially calls the functions -named @ruby_xxxx@ from @main()@ seems very meaningful. +named `ruby_xxxx` from `main()` seems very meaningful. Since they are really obviously symmetric, it's odd if there's not any relationship. Actually, these three functions have deep relationships. Simply speaking, all of these three are "built-in Ruby interfaces". That is, they are used only when -creating a command with built-in @ruby@ interpretor and not when writing -extension libraries. Since @ruby@ command itself can be considered as one of +creating a command with built-in `ruby` interpretor and not when writing +extension libraries. Since `ruby` command itself can be considered as one of programs with built-in Ruby in theory, to use these interfaces is natural. -What is the @ruby_@ prefix ? So far, the all of @ruby@ 's functions are prefixed -with @rb_@. Why are there the two types: @rb_@ and @ruby_@? I investigated but +What is the `ruby_` prefix ? So far, the all of `ruby` 's functions are prefixed +with `rb_`. Why are there the two types: `rb_` and `ruby_`? I investigated but could not understand the difference, so I asked directly. The answer was, -"@ruby_@ is for the auxiliary functions of @ruby@ command and @rb_@ is for the +"`ruby_` is for the auxiliary functions of `ruby` command and `rb_` is for the official interfaces" -"Then, why are the variables like @ruby_scope@ are @ruby_@?", I asked further. -It seems this is just a coincidence. The variables like @ruby_scope@ -are originally named as @the_xxxx@, but in the middle of the version 1.3 there's -a change to add prefixes to all interfaces. At that time @ruby_@ was added to +"Then, why are the variables like `ruby_scope` are `ruby_`?", I asked further. +It seems this is just a coincidence. The variables like `ruby_scope` +are originally named as `the_xxxx`, but in the middle of the version 1.3 there's +a change to add prefixes to all interfaces. At that time `ruby_` was added to the "may-be-internals-for-some-reasons" variables. -The bottom line is that @ruby_@ is attached to things that support -@ruby@ command or the internal variables and @rb_@ is attached to -the official interfaces of @ruby@ interpretor. +The bottom line is that `ruby_` is attached to things that support +`ruby` command or the internal variables and `rb_` is attached to +the official interfaces of `ruby` interpretor. -### @main()@ +### `main()` -First, straightforwardly, I'll start with @main()@. +First, straightforwardly, I'll start with `main()`. It is nice that this is very short. -

▼ @main()@

+

▼ `main()`

-```TODO-lang +```c 36 int 37 main(argc, argv, envp) 38 int argc; @@ -297,16 +301,16 @@ It is nice that this is very short. ``` -@#if def NT@ is obviously the NT of Windows NT. But somehow NT is also +`#if def NT` is obviously the NT of Windows NT. But somehow NT is also defined in Win9x. So, it means Win32 environment. -@NtInitialize()@ initializes @argc argv@ and the socket system (WinSock) for +`NtInitialize()` initializes `argc argv` and the socket system (WinSock) for Win32. Because this function is only doing the initialization, it's not interesting and not related to the main topic. Thus, I omit this. -And, @__MACOS__@ is not "Ma-Ko-Su" but Mac OS. In this case, it means +And, `__MACOS__` is not "Ma-Ko-Su" but Mac OS. In this case, it means Mac OS 9 and before, and it does not include Mac OS X. Even though such -@#ifdef@ remains, as I wrote at the beginning of this book, the current version +`#ifdef` remains, as I wrote at the beginning of this book, the current version can not run on Mac OS 9 and before. It's just a legacy from when ruby was able to run on it. Therefore, I also omit this code. @@ -314,8 +318,8 @@ able to run on it. Therefore, I also omit this code. By the way, as it is probably known by the readers who are familiar with C language, the identifiers starting with an under bar are reserved for the system libraries or OS. However, although they are called "reserved", using it is almost never result in an error, -but if using a little weird @cc@ it could result in an error. -For example, it is the @cc@ of HP-US. HP-US is an UNIX which @HP@ is creating. +but if using a little weird `cc` it could result in an error. +For example, it is the `cc` of HP-US. HP-US is an UNIX which `HP` is creating. If there's any opinion such as HP-UX is not weird, I would deny it out loud. @@ -327,10 +331,10 @@ Now, I'll start to briefly explain about the built-in Ruby interfaces. -### @ruby_init()@ +### `ruby_init()` -@ruby_init()@ initializes the Ruby interpretor. +`ruby_init()` initializes the Ruby interpretor. Since only a single interpretor of the current Ruby can exist in a process, it does not need neither arguments or a return value. This point is generally considered as "lack of features". @@ -339,7 +343,7 @@ This point is generally considered as "lack of features". When there's only a single interpretor, more than anything, things around the development environment should be especially troublesome. -Namely, the applications such as @irb@, RubyWin, and RDE. +Namely, the applications such as `irb`, RubyWin, and RDE. Although loading a rewritten program, the classes which are supposed to be deleted would remain. To counter this with the reflection API is not impossible but requires a lot of efforts. @@ -351,47 +355,47 @@ its reason. For instance, "the loaded extension libraries could not be removed" is taken as an example. -The code of @ruby_init()@ is omitted because it's unnecessary to read. +The code of `ruby_init()` is omitted because it's unnecessary to read. -### @ruby_options()@ +### `ruby_options()` -What to parse command-line options for the Ruby interpreter is @ruby_options()@. +What to parse command-line options for the Ruby interpreter is `ruby_options()`. Of course, depending on the command, we do not have to use this. -Inside this function, @-r@ (load a library) and -@-e@ (pass a program from command-line) are processed. +Inside this function, `-r` (load a library) and +`-e` (pass a program from command-line) are processed. This is also where the file passed as a command-line argument is parsed as a Ruby program. -@ruby@ command reads the main program from a file if it was given, otherwise from @stdin@. -After that, using @rb_compile_string()@ or @rb_compile_file()@ introduced at Part 2, +`ruby` command reads the main program from a file if it was given, otherwise from `stdin`. +After that, using `rb_compile_string()` or `rb_compile_file()` introduced at Part 2, it compiles the text into a syntax tree. -The result will be set into the global variable @ruby_eval_tree@. +The result will be set into the global variable `ruby_eval_tree`. -I also omit the code of @ruby_options()@ because it's just doing necessary +I also omit the code of `ruby_options()` because it's just doing necessary things one by one and not interesting. -### @ruby_run()@ +### `ruby_run()` -Finally, @ruby_run()@ starts to evaluate the syntax tree which was set to @ruby_eval_tree@. -We also don't always need to call this function. Other than @ruby_run()@, -for instance, we can evaluate a string by using a function named @rb_eval_string()@. +Finally, `ruby_run()` starts to evaluate the syntax tree which was set to `ruby_eval_tree`. +We also don't always need to call this function. Other than `ruby_run()`, +for instance, we can evaluate a string by using a function named `rb_eval_string()`. -

▼ @ruby_run()@

+

▼ `ruby_run()`

-```TODO-lang +```c 1257 void 1258 ruby_run() 1259 { @@ -418,14 +422,14 @@ for instance, we can evaluate a string by using a function named @rb_eval_string ``` -We can see the macros @PUSH_xxxx()@, but we can ignore them for now. I'll +We can see the macros `PUSH_xxxx()`, but we can ignore them for now. I'll explain about around them later when the time comes. The important thing here -is only @eval_node()@. Its content is: +is only `eval_node()`. Its content is: -

▼ @eval_node()@

+

▼ `eval_node()`

-```TODO-lang +```c 1112 static VALUE 1113 eval_node(self, node) 1114 VALUE self; @@ -445,42 +449,42 @@ is only @eval_node()@. Its content is: (eval.c) ``` -This calls @rb_eval()@ on @ruby_eval_tree@. The @ruby_eval_tree_begin@ is -storing the statements registered by @BEGIN@. But, this is also not important. +This calls `rb_eval()` on `ruby_eval_tree`. The `ruby_eval_tree_begin` is +storing the statements registered by `BEGIN`. But, this is also not important. -And, @ruby_stop()@ inside of @ruby_run()@ terminates all threads and -finalizes all objects and checks exceptions and, in the end, calls @exit()@. +And, `ruby_stop()` inside of `ruby_run()` terminates all threads and +finalizes all objects and checks exceptions and, in the end, calls `exit()`. This is also not important, so we won't see this. -@rb_eval()@ +`rb_eval()` =========== ### Outline -Now, @rb_eval()@. This function is exactly the real core of @ruby@. -One @rb_eval()@ call processes a single @NODE@, and the whole syntax tree will +Now, `rb_eval()`. This function is exactly the real core of `ruby`. +One `rb_eval()` call processes a single `NODE`, and the whole syntax tree will be processed by calling recursively. (Fig.1) -

-(rbeval)
-Fig.1: @rb_eval@ -

+
+ figure 1: `rb_eval` +
figure 1: rb_eval
+
-@rb_eval@ is, as the same as @yylex()@, made of a huge switch statement and +`rb_eval` is, as the same as `yylex()`, made of a huge switch statement and branching by each type of the nodes. First, let's look at the outline. -

▼ @rb_eval()@ Outline

+

▼ `rb_eval()` Outline

-```TODO-lang +```c 2221 static VALUE 2222 rb_eval(self, n) 2223 VALUE self; @@ -524,37 +528,37 @@ branching by each type of the nodes. First, let's look at the outline. In the omitted part, plenty of the codes to process all nodes are listed. By branching like this, it processes each node. When the code is only a few, -it will be processed in @rb_eval()@. But when it becoming many, it will be a -separated function. Most of functions in @eval.c@ are created in this way. +it will be processed in `rb_eval()`. But when it becoming many, it will be a +separated function. Most of functions in `eval.c` are created in this way. -When returning a value from @rb_eval()@, it uses the macro @RETURN()@ instead -of @return@, in order to always pass through @CHECK_INTS@. Since this macro is +When returning a value from `rb_eval()`, it uses the macro `RETURN()` instead +of `return`, in order to always pass through `CHECK_INTS`. Since this macro is related to threads, you can ignore this until the chapter about it. -And finally, the local variables @result@ and @node@ are @volatile@ for GC. +And finally, the local variables `result` and `node` are `volatile` for GC. -### @NODE_IF@ +### `NODE_IF` -Now, taking the @if@ statement as an example, let's look at the process of -the @rb_eval()@ evaluation concretely. -From here, in the description of @rb_eval()@, +Now, taking the `if` statement as an example, let's look at the process of +the `rb_eval()` evaluation concretely. +From here, in the description of `rb_eval()`, * The source code (a Ruby program) * Its corresponding syntax tree -* The partial code of @rb_eval()@ to process the node. +* The partial code of `rb_eval()` to process the node. these three will be listed at the beginning.

▼source program

-```TODO-lang +```ruby if true 'true expr' else @@ -563,9 +567,9 @@ end ``` -

▼ its corresponding syntax tree ( @nodedump@ )

+

▼ its corresponding syntax tree (`nodedump`)

-```TODO-lang +``` NODE_NEWLINE nd_file = "if" nd_nth = 1 @@ -590,13 +594,13 @@ nd_next: ``` -As we've seen in Part 2, @elsif@ and @unless@ can be, by contriving the ways to assemble, -bundled to a single @NODE_IF@ type, so we don't have to treat them specially. +As we've seen in Part 2, `elsif` and `unless` can be, by contriving the ways to assemble, +bundled to a single `NODE_IF` type, so we don't have to treat them specially. -

▼ @rb_eval()@ − @NODE_IF@

+

▼ `rb_eval()` − `NODE_IF`

-```TODO-lang +```c 2324 case NODE_IF: 2325 if (trace_func) { 2326 call_trace_func("line", node, self, @@ -615,12 +619,12 @@ bundled to a single @NODE_IF@ type, so we don't have to treat them specially. ``` -Only the last @if@ statement is important. +Only the last `if` statement is important. If rewriting it without any change in its meaning, it becomes this: -```TODO-lang +```c if (RTEST(rb_eval(self, node->nd_cond))) { (A) RETURN(rb_eval(self, node->nd_body)); (B) } @@ -631,36 +635,36 @@ else { First, at (A), evaluating (the node of) the Ruby's condition statement and -testing its value with @RTEST()@. -I've mentioned that @RTEST()@ is a macro to test whether or not -a @VALUE@ is true of Ruby. -If that was true, evaluating the @then@ side clause at (B). -If false, evaluating the @else@ side clause at ==(C)==. +testing its value with `RTEST()`. +I've mentioned that `RTEST()` is a macro to test whether or not +a `VALUE` is true of Ruby. +If that was true, evaluating the `then` side clause at (B). +If false, evaluating the `else` side clause at ==(C)==. -In addition, I've mentioned that @if@ statement of Ruby also has its own value, +In addition, I've mentioned that `if` statement of Ruby also has its own value, so it's necessary to return a value. -Since the value of an @if@ is the value of either the @then@ side or the @else@ -side which is the one executed, returning it by using the macro @RETURN()@. +Since the value of an `if` is the value of either the `then` side or the `else` +side which is the one executed, returning it by using the macro `RETURN()`. -In the original list, it does not call @rb_eval()@ recursively but just does @goto@. -This is the "conversion from tail recursion to @goto@ " which has also appeared +In the original list, it does not call `rb_eval()` recursively but just does `goto`. +This is the "conversion from tail recursion to `goto` " which has also appeared in the previous chapter "Syntax tree construction". -### @NODE_NEW_LINE@ +### `NODE_NEW_LINE` -Since there was @NODE_NEWLINE@ at the node for a @if@ statement, +Since there was `NODE_NEWLINE` at the node for a `if` statement, let's look at the code for it. -

▼ @rb_eval()@ - @NODE_NEWLINE@

+

▼ `rb_eval()` - `NODE_NEWLINE`

-```TODO-lang +```c 3404 case NODE_NEWLINE: 3405 ruby_sourcefile = node->nd_file; 3406 ruby_sourceline = node->nd_nth; @@ -679,20 +683,20 @@ let's look at the code for it. There's nothing particularly difficult. -@call_trace_func()@ has already appeared at @NODE_IF@. Here is a simple +`call_trace_func()` has already appeared at `NODE_IF`. Here is a simple explanation of what kind of thing it is. This is a feature to trace a Ruby -program from Ruby level. The debugger ( @debug.rb@ ) and the tracer ( @tracer.rb@ ) -and the profiler ( @profile.rb@ ) and @irb@ (interactive @ruby@ command) and more +program from Ruby level. The debugger ( `debug.rb` ) and the tracer ( `tracer.rb` ) +and the profiler ( `profile.rb` ) and `irb` (interactive `ruby` command) and more are using this feature. -By using the function-like method @set_trace_func@ you can register a @Proc@ -object to trace, and that @Proc@ object is stored into @trace_func@. If -@trace_func@ is not 0, it means not @QFalse@, it will be considered as a @Proc@ -object and executed (at @call_trace_func()@ ). +By using the function-like method `set_trace_func` you can register a `Proc` +object to trace, and that `Proc` object is stored into `trace_func`. If +`trace_func` is not 0, it means not `QFalse`, it will be considered as a `Proc` +object and executed (at `call_trace_func()` ). -This @call_trace_func()@ has nothing to do with the main topic and not so +This `call_trace_func()` has nothing to do with the main topic and not so interesting as well. Therefore in this book, from now on, I'll completely ignore it. If you are interested in it, I'd like you to challenge after finishing the Chapter 16: Blocks. @@ -703,13 +707,13 @@ challenge after finishing the Chapter 16: Blocks. ### Pseudo-local Variables -@NODE_IF@ and such are interior nodes in a syntax tree. +`NODE_IF` and such are interior nodes in a syntax tree. Let's look at the leaves, too. -

▼ @rb_eval()@ Ppseudo-Local Variable Nodes

+

▼ `rb_eval()` Ppseudo-Local Variable Nodes

-```TODO-lang +```c 2312 case NODE_SELF: 2313 RETURN(self); 2314 @@ -726,7 +730,7 @@ Let's look at the leaves, too. ``` -We've seen @self@ as the argument of @rb_eval()@. I'd like you to make sure it +We've seen `self` as the argument of `rb_eval()`. I'd like you to make sure it by going back a little. The others are probably not needed to be explained. @@ -736,24 +740,24 @@ The others are probably not needed to be explained. ### Jump Tag -Next, I'd like to explain @NODE_WHILE@ which is corresponding to @while@, -but to implement @break@ or @next@ only with recursive calls of a function is difficult. -Since @ruby@ enables these syntaxes by using what named "jump tag", +Next, I'd like to explain `NODE_WHILE` which is corresponding to `while`, +but to implement `break` or `next` only with recursive calls of a function is difficult. +Since `ruby` enables these syntaxes by using what named "jump tag", I'll start with describing it first. -Simply put, "jump tag" is a wrapper of @setjmp()@ and @longjump()@ which are -library functions of C language. Do you know about @setjmp()@? -This function has already appeared at @gc.c@, +Simply put, "jump tag" is a wrapper of `setjmp()` and `longjump()` which are +library functions of C language. Do you know about `setjmp()`? +This function has already appeared at `gc.c`, but it is used in very abnormal way there. -@setjmp()@ is usually used to jump over functions. +`setjmp()` is usually used to jump over functions. I'll explain by taking the below code as an example. -The entry point is @parent()@. +The entry point is `parent()`. -

▼ @setjmp()@ and @longjmp()@

+

▼ `setjmp()` and `longjmp()`

-```TODO-lang +```c jmp_buf buf; void child2(void) { @@ -780,40 +784,40 @@ void parent(void) { ``` -First, when @setjmp()@ is called at @parent()@, -the executing state at the time is saved to the argument @buf@. +First, when `setjmp()` is called at `parent()`, +the executing state at the time is saved to the argument `buf`. To put it a little more directly, the address of the top of the machine stack and the CPU registers are saved. -If the return value of @setjmp()@ was 0, it means it normally returned from @setjmp()@, +If the return value of `setjmp()` was 0, it means it normally returned from `setjmp()`, thus you can write the subsequent code as usual. -This is the @if@ side. Here, it calls @child1()@. +This is the `if` side. Here, it calls `child1()`. -Next, the control moves to @child2()@ and calls @longjump@, -then it can go back straight to the place where the argument @buf@ was @setjmp@ ed. -So in this case, it goes back to the @setjmp@ at @parent()@. -When coming back via @longjmp@, the return value of @setjmp@ becomes -the value of the second argument of @longjmp@, so the @else@ side is executed. -And, even if we pass 0 to @longjmp@, +Next, the control moves to `child2()` and calls `longjump`, +then it can go back straight to the place where the argument `buf` was `setjmp` ed. +So in this case, it goes back to the `setjmp` at `parent()`. +When coming back via `longjmp`, the return value of `setjmp` becomes +the value of the second argument of `longjmp`, so the `else` side is executed. +And, even if we pass 0 to `longjmp`, it will be forced to be another value. Thus it's fruitless. Fig.2 shows the state of the machine stack. The ordinary functions return only once for each call. -However, it's possible @setjmp()@ returns twice. -Is it helpful to grasp the concept if I say that it is something like @fork()@? +However, it's possible `setjmp()` returns twice. +Is it helpful to grasp the concept if I say that it is something like `fork()`? -

+

TODO (setjmp)
-Fig.2: @setjmp()@ @longjmp()@ Image +Fig.2: `setjmp()` `longjmp()` Image

-Now, we've learned about @setjmp()@ as a preparation. -In @eval.c@, @EXEC_TAG@ corresponds to @setjmp()@ and @JUMP_TAG()@ corresponds -to @longjmp()@ respectively. (Fig.3) +Now, we've learned about `setjmp()` as a preparation. +In `eval.c`, `EXEC_TAG` corresponds to `setjmp()` and `JUMP_TAG()` corresponds +to `longjmp()` respectively. (Fig.3)

@@ -822,15 +826,15 @@ Fig.3: "tag jump" image

-Take a look at this image, it seems that @EXEC_TAG()@ does not have any arguments. -Where has @jmp_buf@ gone? -Actually, in @ruby@, @jmp_buf@ is wrapped by the struct @struct tag@. +Take a look at this image, it seems that `EXEC_TAG()` does not have any arguments. +Where has `jmp_buf` gone? +Actually, in `ruby`, `jmp_buf` is wrapped by the struct `struct tag`. Let's look at it. -

▼ @struct tag@

+

▼ `struct tag`

-```TODO-lang +```c 783 struct tag { 784 jmp_buf buf; 785 struct FRAME *frame; /* FRAME when PUSH_TAG */ @@ -846,14 +850,14 @@ Let's look at it. ``` -Because there's the member @prev@, we can infer that @struct tag@ is probably +Because there's the member `prev`, we can infer that `struct tag` is probably a stack structure using a linked list. Moreover, by looking around it, we can -find the macros @PUSH_TAG()@ and @POP_TAG@, thus it definitely seems a stack. +find the macros `PUSH_TAG()` and `POP_TAG`, thus it definitely seems a stack. -

▼ @PUSH_TAG() POP_TAG()@

+

▼ `PUSH_TAG() POP_TAG()`

-```TODO-lang +```c 793 static struct tag *prot_tag; /* the pointer to the head of the machine stack */ 795 #define PUSH_TAG(ptag) do { \ @@ -878,14 +882,14 @@ find the macros @PUSH_TAG()@ and @POP_TAG@, thus it definitely seems a stack. I'd like you to be flabbergasted here because the actual tag is fully allocated -at the machine stack as a local variable. (Fig.4). Moreover, @do@ ~ @while@ is +at the machine stack as a local variable. (Fig.4). Moreover, `do` ~ `while` is divided between the two macros. This might be one of the most awful usages of the C preprocessor. -Here is the macros @PUSH@ / @POP@ coupled and extracted to make it easy to read. +Here is the macros `PUSH` / `POP` coupled and extracted to make it easy to read. -```TODO-lang +```c do { struct tag _tag; _tag.prev = prot_tag; /* save the previous tag */ @@ -898,8 +902,8 @@ do { This method does not have any overhead of function calls, and its cost of the memory allocation is next to nothing. -This technique is only possible because the @ruby@ evaluator is made of -recursive calls of @rb_eval()@. +This technique is only possible because the `ruby` evaluator is made of +recursive calls of `rb_eval()`. @@ -909,18 +913,18 @@ Fig.4: the tag stack is embedded in the machine stack

-Because of this implementation, it's necessary that @PUSH_TAG@ and @POP_TAG@ +Because of this implementation, it's necessary that `PUSH_TAG` and `POP_TAG` are in the same one function as a pair. Plus, since it's not supposed to be carelessly used at the outside of the evaluator, we can't make them available to other files. -Additionally, let's also take a look at @EXEC_TAG()@ and @JUMP_TAG()@. +Additionally, let's also take a look at `EXEC_TAG()` and `JUMP_TAG()`. -

▼ @EXEC_TAG() JUMP_TAG()@

+

▼ `EXEC_TAG() JUMP_TAG()`

-```TODO-lang +```c 810 #define EXEC_TAG() setjmp(prot_tag->buf) 812 #define JUMP_TAG(st) do { \ @@ -933,43 +937,43 @@ Additionally, let's also take a look at @EXEC_TAG()@ and @JUMP_TAG()@. ``` -In this way, @setjmp@ and @longjmp@ are wrapped by @EXEC_TAG()@ and @JUMP_TAG()@ respectively. -The name @EXEC_TAG()@ can look like a wrapper of @longjmp()@ at first sight, -but this one is to execute @setjmp()@. +In this way, `setjmp` and `longjmp` are wrapped by `EXEC_TAG()` and `JUMP_TAG()` respectively. +The name `EXEC_TAG()` can look like a wrapper of `longjmp()` at first sight, +but this one is to execute `setjmp()`. -Based on all of the above, I'll explain the mechanism of @while@. -First, when starting @while@ it does @EXEC_TAG()@ ( @setjmp@ ). -After that, it executes the main body by calling @rb_eval()@ -recursively. If there's @break@ or @next@, it does @JUMP_TAG()@ ( @longjmp@ ). -Then, it can go back to the start point of the @while@ loop. (Fig.5) +Based on all of the above, I'll explain the mechanism of `while`. +First, when starting `while` it does `EXEC_TAG()` (`setjmp`). +After that, it executes the main body by calling `rb_eval()` +recursively. If there's `break` or `next`, it does `JUMP_TAG()` (`longjmp`). +Then, it can go back to the start point of the `while` loop. (Fig.5) -

+

TODO (whilejmp)
-Fig.5: the implementation of @while@ by using "tag jump" +Fig.5: the implementation of `while` by using "tag jump"

-Though @break@ was taken as an example here, what cannot be implemented without -jumping is not only @break@. Even if we limit the case to @while@, -there are @next@ and @redo@. -Additionally, @return@ from a method and exceptions also should have to -climb over the wall of @rb_eval()@. +Though `break` was taken as an example here, what cannot be implemented without +jumping is not only `break`. Even if we limit the case to `while`, +there are `next` and `redo`. +Additionally, `return` from a method and exceptions also should have to +climb over the wall of `rb_eval()`. And since it's cumbersome to use a different tag stack for each case, we want for only one stack to handle all cases in one way or another. What we need to make it possible is just attaching information about "what the purpose of this jump is". -Conveniently, the return value of @setjmp()@ could be specified as the argument -of @longjmp()@, thus we can use this. The types are expressed by the following flags: +Conveniently, the return value of `setjmp()` could be specified as the argument +of `longjmp()`, thus we can use this. The types are expressed by the following flags:

▼tag type

-```TODO-lang +```c 828 #define TAG_RETURN 0x1 /* return */ 829 #define TAG_BREAK 0x2 /* break */ 830 #define TAG_NEXT 0x3 /* next */ @@ -984,32 +988,32 @@ of @longjmp()@, thus we can use this. The types are expressed by the following f ``` -The meanings are written as each comment. The last @TAG_MASK@ is the bitmask to -take out these flags from a return value of @setjmp()@. This is because the -return value of @setjmp()@ can also include information which is not about a +The meanings are written as each comment. The last `TAG_MASK` is the bitmask to +take out these flags from a return value of `setjmp()`. This is because the +return value of `setjmp()` can also include information which is not about a "type of jump". -### @NODE_WHILE@ +### `NODE_WHILE` -Now, by examining the code of @NODE_WHILE@, let's check the actual usage of tags. +Now, by examining the code of `NODE_WHILE`, let's check the actual usage of tags.

▼ The Source Program

-```TODO-lang +```ruby while true 'true_expr' end ``` -

▼ Its corresponding syntax tree( @nodedump-short@ )

+

▼ Its corresponding syntax tree( `nodedump-short` )

-```TODO-lang +``` NODE_WHILE nd_state = 1 (while) nd_cond: @@ -1020,9 +1024,9 @@ nd_body: ``` -

▼ @rb_eval@ - @NODE_WHILE@

+

▼ `rb_eval` - `NODE_WHILE`

-```TODO-lang +```c 2418 case NODE_WHILE: 2419 PUSH_TAG(PROT_NONE); 2420 result = Qnil; @@ -1063,7 +1067,7 @@ The idiom which will appear over and over again appeared in the above code. -```TODO-lang +```c PUSH_TAG(PROT_NONE); switch (state = EXEC_TAG()) { case 0: @@ -1085,25 +1089,25 @@ if (state) JUMP_TAG(state); /* .. jump again here */ ``` -First, as @PUSH_TAG()@ and @POP_TAG()@ are the previously described mechanism, +First, as `PUSH_TAG()` and `POP_TAG()` are the previously described mechanism, it's necessary to be used always as a pair. Also, they need to be written -outside of @EXEC_TAG()@. And, apply @EXEC_TAG()@ to the just pushed @jmp_buf@. -This means doing @setjmp()@. -If the return value is 0, since it means immediately returning from @setjmp()@, -it does the normal processing (this usually contains @rb_eval()@ ). -If the return value of @EXEC_TAG()@ is not 0, since it means returning via @longjmp()@, -it filters only the own necessary jumps by using @case@ and -lets the rest ( @default@ ) pass. +outside of `EXEC_TAG()`. And, apply `EXEC_TAG()` to the just pushed `jmp_buf`. +This means doing `setjmp()`. +If the return value is 0, since it means immediately returning from `setjmp()`, +it does the normal processing (this usually contains `rb_eval()` ). +If the return value of `EXEC_TAG()` is not 0, since it means returning via `longjmp()`, +it filters only the own necessary jumps by using `case` and +lets the rest ( `default` ) pass. It might be helpful to see also the code of the jumping side. -The below code is the handler of the node of @redo@. +The below code is the handler of the node of `redo`. -

▼ @rb_eval()@ - @NODE_REDO@

+

▼ `rb_eval()` - `NODE_REDO`

-```TODO-lang +```c 2560 case NODE_REDO: 2561 CHECK_INTS; 2562 JUMP_TAG(TAG_REDO); @@ -1113,19 +1117,19 @@ The below code is the handler of the node of @redo@. ``` -As a result of jumping via @JUMP_TAG()@, it goes back to the last @EXEC_TAG()@. -The return value at the time is the argument @TAG_REDO@. Being aware of this, -I'd like you to look at the code of @NODE_WHILE@ and check what route is taken. +As a result of jumping via `JUMP_TAG()`, it goes back to the last `EXEC_TAG()`. +The return value at the time is the argument `TAG_REDO`. Being aware of this, +I'd like you to look at the code of `NODE_WHILE` and check what route is taken. -The idiom has enough explained, now I'll explain about the code of @NODE_WHILE@ -a little more in detail. As mentioned, since the inside of @case 0:@ is the main +The idiom has enough explained, now I'll explain about the code of `NODE_WHILE` +a little more in detail. As mentioned, since the inside of `case 0:` is the main process, I extracted only that part. Additionally, I moved some labels to enhance readability. -```TODO-lang +```c if (node->nd_state && !RTEST(rb_eval(self, node->nd_cond))) goto while_out; do { @@ -1135,22 +1139,22 @@ while_out: ``` -There are the two places calling @rb_eval()@ on @node->nd_state@ which +There are the two places calling `rb_eval()` on `node->nd_state` which corresponds to the conditional statement. It seems that only the first test of -the condition is separated. This is to deal with both @do@ ~ @while@ and @while@ -at once. When @node->nd_state@ is 0 it is a @do@ ~ @while@, when 1 it is an -ordinary @while@. The rest might be understood by following step-by-step, +the condition is separated. This is to deal with both `do` ~ `while` and `while` +at once. When `node->nd_state` is 0 it is a `do` ~ `while`, when 1 it is an +ordinary `while`. The rest might be understood by following step-by-step, I won't particularly explain. -By the way, I feel like it easily becomes an infinite loop if there is @next@ -or @redo@ in the condition statement. Since it is of course exactly what the +By the way, I feel like it easily becomes an infinite loop if there is `next` +or `redo` in the condition statement. Since it is of course exactly what the code means, it's the fault of who wrote it, but I'm a little curious about it. So, I've actually tried it. -```TODO-lang +``` % ruby -e 'while next do nil end' -e:1: void value expression ``` @@ -1158,23 +1162,23 @@ So, I've actually tried it. It's simply rejected at the time of parsing. It's safe but not an interesting result. -What produces this error is @value_expr()@ of @parse.y@. +What produces this error is `value_expr()` of `parse.y`. -### The value of an evaluation of @while@ +### The value of an evaluation of `while` -@while@ had not had its value for a long time, but it has been able to return -a value by using @break@ since @ruby@ 1.7. +`while` had not had its value for a long time, but it has been able to return +a value by using `break` since `ruby` 1.7. This time, let's focus on the flow of the value of an evaluation. -Keeping in mind that the value of the local variable @result@ becomes the -return value of @rb_eval()@, I'd like you to look at the following code: +Keeping in mind that the value of the local variable `result` becomes the +return value of `rb_eval()`, I'd like you to look at the following code: -```TODO-lang +```c result = Qnil; switch (state = EXEC_TAG()) { case 0: @@ -1194,13 +1198,13 @@ return value of @rb_eval()@, I'd like you to look at the following code: What we should focus on is only (A). The return value of the jump seems to be -passed via @prot_tag->retval@ which is a @struct tag@. +passed via `prot_tag->retval` which is a `struct tag`. Here is the passing side: -

▼ @rb_eval()@ - @NODE_BREAK@

+

▼ `rb_eval()` - `NODE_BREAK`

-```TODO-lang +```c 2219 #define return_value(v) prot_tag->retval = (v) 2539 case NODE_BREAK: @@ -1217,17 +1221,17 @@ Here is the passing side: ``` -In this way, by using the macro @return_value()@, it assigns the value to the +In this way, by using the macro `return_value()`, it assigns the value to the struct of the top of the tag stack. -The basic flow is this, but in practice there could be another @EXEC_TAG@ -between @EXEC_TAG()@ of @NODE_WHILE@ and @JUMP_TAG()@ of @NODE_BREAK@. -For example, @rescue@ of an exception handling can exist between them. +The basic flow is this, but in practice there could be another `EXEC_TAG` +between `EXEC_TAG()` of `NODE_WHILE` and `JUMP_TAG()` of `NODE_BREAK`. +For example, `rescue` of an exception handling can exist between them. -```TODO-lang +```ruby while cond # EXEC_TAG() for NODE_WHILE begin # EXEC_TAG() again for rescue break 1 @@ -1237,15 +1241,15 @@ end ``` -Therefore, it's hard to determine whether or not the @strict tag@ of when doing -@JUMP_TAG()@ at @NODE_BREAK@ is the one which was pushed at @NODE_WHILE@. -In this case, because @retval@ is propagated in @POP_TAG()@ as shown below, +Therefore, it's hard to determine whether or not the `strict tag` of when doing +`JUMP_TAG()` at `NODE_BREAK` is the one which was pushed at `NODE_WHILE`. +In this case, because `retval` is propagated in `POP_TAG()` as shown below, the return value can be passed to the next tag without particular thought. -

▼ @POP_TAG()@

+

▼ `POP_TAG()`

-```TODO-lang +```c 818 #define POP_TAG() \ 819 if (_tag.prev) \ 820 _tag.prev->retval = _tag.retval;\ @@ -1278,17 +1282,17 @@ As the second example of the usage of "tag jump", we'll look at how exceptions are dealt with. -### @raise@ +### `raise` -When I explained @while@, we looked at the @setjmp()@ side first. This time, -we'll look at the @longjmp()@ side first for a change. It's @rb_exc_raise()@ -which is the substance of @raise@. +When I explained `while`, we looked at the `setjmp()` side first. This time, +we'll look at the `longjmp()` side first for a change. It's `rb_exc_raise()` +which is the substance of `raise`. -

▼ @rb_exc_raise()@

+

▼ `rb_exc_raise()`

-```TODO-lang +```c 3645 void 3646 rb_exc_raise(mesg) 3647 VALUE mesg; @@ -1300,14 +1304,14 @@ which is the substance of @raise@. ``` -@mesg@ is an exception object (an instance of @Exception@ or one of its subclass). -Notice that It seems to jump with @TAG_RAISE@ this time. -And the below code is very simplified @rb_longjmp()@. +`mesg` is an exception object (an instance of `Exception` or one of its subclass). +Notice that It seems to jump with `TAG_RAISE` this time. +And the below code is very simplified `rb_longjmp()`. -

▼ @rb_longjmp()@ (simplified)

+

▼ `rb_longjmp()` (simplified)

-```TODO-lang +```c static void rb_longjmp(tag, mesg) int tag; @@ -1323,13 +1327,13 @@ rb_longjmp(tag, mesg) Well, though this can be considered as a matter of course, this is just to jump -as usual by using @JUMP_TAG()@. +as usual by using `JUMP_TAG()`. -What is @ruby_errinfo@? By doing @grep@ a few times, I figured out that this -variable is the substance of the global variable @$!@ of Ruby. +What is `ruby_errinfo`? By doing `grep` a few times, I figured out that this +variable is the substance of the global variable `$!` of Ruby. Since this variable indicates the exception which is currently occurring, -naturally its substance @ruby_errinfo@ should have the same meaning as well. +naturally its substance `ruby_errinfo` should have the same meaning as well. @@ -1339,7 +1343,7 @@ naturally its substance @ruby_errinfo@ should have the same meaning as well.

▼the source program

-```TODO-lang +```ruby begin raise('exception raised') rescue @@ -1350,9 +1354,9 @@ end ``` -

▼the syntax tree( @nodedump-short@ )

+

▼the syntax tree(`nodedump-short`)

-```TODO-lang +``` NODE_BEGIN nd_body: NODE_ENSURE @@ -1381,16 +1385,16 @@ nd_body: ``` -As the right order of @rescue@ and @ensure@ is decided at parser level, +As the right order of `rescue` and `ensure` is decided at parser level, the right order is -strictly decided at syntax tree as well. @NODE_ENSURE@ is always at the "top", -@NODE_RESCUE@ comes next, the main body (where @raise@ exist) is the last. -Since @NODE_BEGIN@ is a node to do nothing, you can consider @NODE_ENSURE@ is +strictly decided at syntax tree as well. `NODE_ENSURE` is always at the "top", +`NODE_RESCUE` comes next, the main body (where `raise` exist) is the last. +Since `NODE_BEGIN` is a node to do nothing, you can consider `NODE_ENSURE` is virtually on the top. -This means, since @NODE_ENSURE@ and @NODE_RESCUE@ are above the main body which -we want to protect, we can stop @raise@ by merely doing @EXEC_TAG()@. Or rather, +This means, since `NODE_ENSURE` and `NODE_RESCUE` are above the main body which +we want to protect, we can stop `raise` by merely doing `EXEC_TAG()`. Or rather, the two nodes are put above in syntax tree for this purpose, is probably more accurate to say. @@ -1398,15 +1402,15 @@ accurate to say. -### @ensure@ +### `ensure` -We are going to look at the handler of @NODE_ENSURE@ which is the node of @ensure@. +We are going to look at the handler of `NODE_ENSURE` which is the node of `ensure`. -

▼ @rb_eval()@ - @NODE_ENSURE@

+

▼ `rb_eval()` - `NODE_ENSURE`

-```TODO-lang +```c 2634 case NODE_ENSURE: 2635 PUSH_TAG(PROT_NONE); 2636 if ((state = EXEC_TAG()) == 0) { @@ -1428,9 +1432,9 @@ We are going to look at the handler of @NODE_ENSURE@ which is the node of @ensur ``` -This branch using @if@ is another idiom to deal with tag. -It interrupts a jump by doing @EXEC_TAG()@ then evaluates the @ensure@ clause ( -( @node->nd_ensr@ ). As for the flow of the process, it's probably straightforward. +This branch using `if` is another idiom to deal with tag. +It interrupts a jump by doing `EXEC_TAG()` then evaluates the `ensure` clause ( +(`node->nd_ensr`). As for the flow of the process, it's probably straightforward. Again, we'll try to think about the value of an evaluation. @@ -1438,7 +1442,7 @@ To check the specification first, -```TODO-lang +```ruby begin expr0 ensure @@ -1447,33 +1451,33 @@ end ``` -for the above statement, the value of the whole @begin@ will be the value of -@expr0@ regardless of whether or not @ensure@ exists. +for the above statement, the value of the whole `begin` will be the value of +`expr0` regardless of whether or not `ensure` exists. This behavior is reflected to the code (A-1,2), -so the value of the evaluation of an @ensure@ clause is completely discarded. +so the value of the evaluation of an `ensure` clause is completely discarded. At (B-1,3), it deals with the evaluated value of when a jump occurred at the main body. -I mentioned that the value of this case is stored in @prot_tag->retval@, +I mentioned that the value of this case is stored in `prot_tag->retval`, so it saves the value to a local variable to prevent from being carelessly -overwritten during the execution of the @ensure@ clause (B-1). -After the evaluation of the @ensure@ clause, it restores the value by using -@return_value()@ (B-2). -When any jump has not occurred, @state==0@ in this case, -@prot_tag->retval@ is not used in the first place. +overwritten during the execution of the `ensure` clause (B-1). +After the evaluation of the `ensure` clause, it restores the value by using +`return_value()` (B-2). +When any jump has not occurred, `state==0` in this case, +`prot_tag->retval` is not used in the first place. -### @rescue@ +### `rescue` -It's been a little while, I'll show the syntax tree of @rescue@ again just in case. +It's been a little while, I'll show the syntax tree of `rescue` again just in case.

▼Source Program

-```TODO-lang +```ruby begin raise() rescue ArgumentError, TypeError @@ -1482,9 +1486,9 @@ end ``` -

▼ Its Syntax Tree ( @nodedump-short@ )

+

▼ Its Syntax Tree (`nodedump-short`)

-```TODO-lang +``` NODE_BEGIN nd_body: NODE_RESCUE @@ -1512,12 +1516,12 @@ nd_body: I'd like you to make sure that (the syntax tree of) the statement to be -@rescue@ ed is "under" @NODE_RESCUE@. +`rescue` ed is "under" `NODE_RESCUE`. -

▼ @rb_eval()@ - @NODE_RESCUE@

+

▼ `rb_eval()` - `NODE_RESCUE`

-```TODO-lang +```c 2590 case NODE_RESCUE: 2591 retry_entry: 2592 { @@ -1568,19 +1572,19 @@ I'd like you to make sure that (the syntax tree of) the statement to be Even though the size is not small, it's not difficult because it only simply deal with the nodes one by one. -This is the first time @handle_rescue()@ appeared, +This is the first time `handle_rescue()` appeared, but for some reasons we cannot look at this function now. I'll explain only its effects here. Its prototype is this, -```TODO-lang +```c static int handle_rescue(VALUE self, NODE *resq) ``` -and it determines whether the currently occurring exception (@ruby_errinfo@) is -a subclass of the class that is expressed by @resq@ (@TypeError@, for instance). -The reason why passing @self@ is that it's necessary to call @rb_eval()@ inside -this function in order to evaluate @resq@. +and it determines whether the currently occurring exception (`ruby_errinfo`) is +a subclass of the class that is expressed by `resq` (`TypeError`, for instance). +The reason why passing `self` is that it's necessary to call `rb_eval()` inside +this function in order to evaluate `resq`. diff --git a/gc.md b/gc.md index ebe9b92..093c59d 100644 --- a/gc.md +++ b/gc.md @@ -76,12 +76,12 @@ But in that case, only the feature that we don't have to free it by ourselves is implemented and it does not necessarily allocate the memory on the machine stack. In fact, it often does not. If it were possible, a native -@alloca()@ could have been implemented in the first place. +`alloca()` could have been implemented in the first place. -How can one implement @alloca()@ in C? The simplest implementation is: -first allocate memory normally with @malloc()@. Then remember the pair of the function -which called @alloca()@ and the assigned addresses in a global list. -After that, check this list whenever @alloca()@ is called, +How can one implement `alloca()` in C? The simplest implementation is: +first allocate memory normally with `malloc()`. Then remember the pair of the function +which called `alloca()` and the assigned addresses in a global list. +After that, check this list whenever `alloca()` is called, if there are the memories allocated for the functions already finished, free them by using `free()`. @@ -92,7 +92,7 @@ free them by using `free()`. -The @missing/alloca.c@ of @ruby@ is an example of an emulated @alloca()@ . +The `missing/alloca.c` of `ruby` is an example of an emulated `alloca()` . Overview @@ -106,8 +106,8 @@ garbage collection. Objects are normally on top of the memory. Naturally, if a lot of objects are created, a lot of memory is used. If memory were infinite there would be no problem, but in reality there is always a memory limit. That's why the memory which is not -used anymore must be collected and recycled. More concretely the memory received through @malloc()@ must be returned with -@free()@. +used anymore must be collected and recycled. More concretely the memory received through `malloc()` must be returned with +`free()`. However, it would require a lot of efforts if the management of `malloc()` and `free()` were entirely left to programmers. @@ -214,7 +214,7 @@ There are also two disadvantages. * In order to sweep every object must be touched at least once. * The load of the GC is concentrated at one point. -When using the emacs editor, there sometimes appears " @Garbage collecting...@ " +When using the emacs editor, there sometimes appears _Garbage collecting..._ and it completely stops reacting. That is an example of the second disadvantage. But this point can be alleviated by modifying the algorithm (it is called incremental GC). @@ -223,12 +223,12 @@ But this point can be alleviated by modifying the algorithm (it is called increm ### Stop and Copy Stop and Copy is a variation of Mark and Sweep. First, prepare several object -areas. To simplify this description, assume there are two areas @A@ and @B@ here. +areas. To simplify this description, assume there are two areas `A` and `B` here. And put an "active" mark on the one of the areas. When creating an object, create it only in the "active" one. (Figure 5)
- figure 5: Stop and Copy (1 + figure 5: Stop and Copy (1)
figure 5: Stop and Copy (1
@@ -236,10 +236,10 @@ When creating an object, create it only in the "active" one. (Figure 5) When the GC starts, follow links from the roots in the same manner as mark-and-sweep. However, move objects to another area instead of marking them (Figure 6). When all the links have been followed, discard the all elements -which remain in @A@, and make @B@ active next. +which remain in `A`, and make `B` active next.
- figure 6: Stop and Copy (2 + figure 6: Stop and Copy (2)
figure 6: Stop and Copy (2
@@ -317,7 +317,7 @@ For instance, the following function will cause a memory leak even if `ruby` is running. -```TODO-lang +```c void not_ok() { malloc(1024); /* receive memory and discard it */ @@ -327,14 +327,14 @@ void not_ok() However, the following function does not cause a memory leak. -```TODO-lang +```c void this_is_ok() { rb_ary_new(); /* create a ruby array and discard it */ } ``` -Since @rb_ary_new()@ uses Ruby's proper interface to allocate memory, +Since `rb_ary_new()` uses Ruby's proper interface to allocate memory, the created object is under the management of the GC of `ruby`, thus `ruby` will take care of it. @@ -343,7 +343,7 @@ thus `ruby` will take care of it. Since the substance of an object is a struct, managing objects means managing that structs. -Of course the non-pointer objects like @Fixnum Symbol nil true false@ are +Of course the non-pointer objects like `Fixnum Symbol nil true false` are exceptions, but I won't always describe about it to prevent descriptions from being redundant. @@ -356,7 +356,7 @@ The declaration of that union is as follows. ▼ `RVALUE` -```TODO-lang +```c 211 typedef struct RVALUE { 212 union { 213 struct { @@ -412,7 +412,7 @@ Hereafter, let's call this an object heap. ▼ Object heap -```TODO-lang +```c 239 #define HEAPS_INCREMENT 10 240 static RVALUE **heaps; 241 static int heaps_length = 0; @@ -425,18 +425,18 @@ Hereafter, let's call this an object heap. (gc.c) ``` -@heaps@ is an array of arrays of @struct RVALUE@. Since it is `heapS`, -the each contained array is probably each @heap@. -Each element of @heap@ is each @slot@ (Figure 9). +`heaps` is an array of arrays of `struct RVALUE`. Since it is `heapS`, +the each contained array is probably each `heap`. +Each element of `heap` is each `slot` (Figure 9).
figure 9: `heaps`, `heap`, `slot`
figure 9: `heaps`, `heap`, `slot`
-The length of @heaps@ is @heap_length@ and it can be changed. The number of -the slots actually in use is @heaps_used@. The length of each heap -is in the corresponding @heaps_limits[index]@. +The length of `heaps` is `heap_length` and it can be changed. The number of +the slots actually in use is `heaps_used`. The length of each heap +is in the corresponding `heaps_limits[index]`. Figure 10 shows the structure of the object heap.
@@ -473,9 +473,6 @@ According to these requirements, it is good that the object heap form a structure that the addresses are cohesive to some extent and whose position and total amount are not restricted at the same time. - - - ### `freelist` @@ -486,7 +483,7 @@ The `as.free.next` of `RVALUE` is the link used for this purpose. ▼ `freelist` -```TODO-lang +```c 236 static RVALUE *freelist = 0; (gc.c) @@ -506,7 +503,7 @@ I'll show the one simplified by omitting error handlings and castings. ▼ `add_heap()` (simplified) -```TODO-lang +```c static void add_heap() { @@ -566,7 +563,7 @@ Let's confirm this by reading the `rb_newobj()` function to create an object. ▼ `rb_newobj()` -```TODO-lang +```c 297 VALUE 298 rb_newobj() 299 { @@ -612,7 +609,7 @@ and free objects that `FL_MARK` has not been set. ▼ `rb_gc_mark()` -```TODO-lang +```c 573 void 574 rb_gc_mark(ptr) 575 VALUE ptr; @@ -647,11 +644,11 @@ and free objects that `FL_MARK` has not been set. ``` -The definition of @RANY()@ is as follows. It is not particularly important. +The definition of `RANY()` is as follows. It is not particularly important. ▼ `RANY()` -```TODO-lang +```c 295 #define RANY(o) ((RVALUE*)(o)) (gc.c) @@ -663,7 +660,7 @@ checks for marked objects at the beginning, -```TODO-lang +```c obj->as.basic.flags |= FL_MARK; ``` @@ -683,8 +680,6 @@ and later it marks them once again. This code is omitted because it is not part of the main line. - - ### `rb_gc_mark_children()` @@ -696,7 +691,7 @@ Here, it is shown but the simple enumerations are omitted: ▼ `rb_gc_mark_children()` -```TODO-lang +```c 603 void 604 rb_gc_mark_children(ptr) 605 VALUE ptr; @@ -774,7 +769,7 @@ This code is extracted from the second `switch` statement. ▼ `rb_gc_mark_children()` - `T_DATA` -```TODO-lang +```c 789 case T_DATA: 790 if (obj->as.data.dmark) (*obj->as.data.dmark)(DATA_PTR(obj)); 791 break; @@ -804,7 +799,7 @@ In other words, "the roots of GC". ▼ `rb_gc()` -```TODO-lang +```c 1110 void 1111 rb_gc() 1112 { @@ -837,7 +832,7 @@ It means that the local variables and arguments of C are automatically marked. For example, -```TODO-lang +```c static int f(void) { @@ -870,7 +865,7 @@ you don't have to think so much about it for now. ▼ Marking the Ruby Stack -```TODO-lang +```c 1130 /* mark frame stack */ 1131 for (frame = ruby_frame; frame; frame = frame->prev) { 1132 rb_gc_mark_frame(frame); @@ -901,7 +896,7 @@ Next, it marks the CPU registers. ▼ marking the registers -```TODO-lang +```c 1148 FLUSH_REGISTER_WINDOWS; 1149 /* Here, all registers must be saved into jmp_buf. */ 1150 setjmp(save_regs_gc_mark); @@ -931,7 +926,7 @@ to explicitly write out the registers.

▼ the original version of `setjmp`

-```TODO-lang +```c 1072 #ifdef __GNUC__ 1073 #if defined(__human68k__) || defined(DJGPP) 1074 #if defined(__human68k__) @@ -1000,7 +995,7 @@ it will be marked in the next code:

▼ mark the registers (shown again)

-```TODO-lang +```c 1151 mark_locations_array((VALUE*)save_regs_gc_mark, sizeof(save_regs_gc_mark) / sizeof(VALUE *)); @@ -1019,7 +1014,7 @@ I'll describe it in the next section.

▼ `mark_locations_array()`

-```TODO-lang +```c 500 static void 501 mark_locations_array(x, n) 502 register VALUE *x; @@ -1060,7 +1055,7 @@ it is `is_pointer_to_heap()`.

▼ `is_pointer_to_heap()`

-```TODO-lang +```c 480 static inline int 481 is_pointer_to_heap(ptr) 482 void *ptr; @@ -1124,7 +1119,7 @@ The content of the macro is like this:

▼ `FLUSH_REGISTER_WINDOWS`

-```TODO-lang +```c 125 #if defined(sparc) || defined(__sparc__) 126 # if defined(linux) || defined(__linux__) 127 #define FLUSH_REGISTER_WINDOWS asm("ta 0x83") @@ -1165,7 +1160,7 @@ This time, it marks `VALUES`s in the machine stack.

▼ mark the machine stack

-```TODO-lang +```c 1152 rb_gc_mark_locations(rb_gc_stack_start, (VALUE*)STACK_END); 1153 #if defined(__human68k__) 1154 rb_gc_mark_locations((VALUE*)((char*)rb_gc_stack_start + 2), @@ -1205,7 +1200,7 @@ initializing the `ruby` interpretor.

▼ `Init_stack()`

-```TODO-lang +```c 1193 void 1194 Init_stack(addr) 1195 VALUE *addr; @@ -1262,7 +1257,7 @@ Next, we'll look at the `STACK_END` which is the macro to detect the end of the

▼ `STACK_END`

-```TODO-lang +```c 345 #ifdef C_ALLOCA 346 # define SET_STACK_END VALUE stack_end; alloca(0); 347 # define STACK_END (&stack_end) @@ -1321,7 +1316,7 @@ The last one is the `rb_gc_mark_locations()` function that actually marks the st

▼ `rb_gc_mark_locations()`

-```TODO-lang +```c 513 void 514 rb_gc_mark_locations(start, end) 515 VALUE *start, *end; @@ -1361,7 +1356,7 @@ Finally, it marks the built-in `VALUE` containers of the interpretor.

▼ The other roots

-```TODO-lang +```c 1159 /* mark the registered global variables */ 1160 for (list = global_List; list; list = list->next) { 1161 rb_gc_mark(*list->varptr); @@ -1434,7 +1429,7 @@ Take a look at the next part:

▼ at the beggining of `gc_sweep()`

-```TODO-lang +```c 846 static void 847 gc_sweep() 848 { @@ -1483,7 +1478,7 @@ This hook is called "finalizer".

▼ `gc_sweep()` Middle

-```TODO-lang +```c 869 freelist = 0; 870 final_list = deferred_final_list; 871 deferred_final_list = 0; @@ -1554,7 +1549,7 @@ It means that while executing the finalizers, one cannot use the hooked objects.

▼ `gc_sweep()` the rest

-```TODO-lang +```c 910 if (final_list) { 911 RVALUE *tmp; 912 @@ -1597,7 +1592,7 @@ It's `rb_gc_force_recycle()`.

▼ `rb_gc_force_recycle()`

-```TODO-lang +```c 928 void 929 rb_gc_force_recycle(p) 930 VALUE p; @@ -1803,7 +1798,7 @@ For example, there's a possibility of disappearing in the following case: -```TODO-lang +```c VALUE str; str = rb_str_new2("..."); printf("%s\n", RSTRING(str)->ptr); @@ -1817,7 +1812,7 @@ There's no choice in this case -```TODO-lang +```c volatile VALUE str; ``` @@ -1901,7 +1896,7 @@ We've created objects many times. For example, in this way: -```TODO-lang +```ruby class C end C.new() @@ -1916,7 +1911,7 @@ First, `C.new` is actually `Class#new`. Its actual body is this:

▼ `rb_class_new_instance()`

-```TODO-lang +```c 725 VALUE 726 rb_class_new_instance(argc, argv, klass) 727 int argc; @@ -1942,7 +1937,7 @@ It is `Class#allocate` by default and its actual body is `rb_class_allocate_inst

▼ `rb_class_allocate_instance()`

-```TODO-lang +```c 708 static VALUE 709 rb_class_allocate_instance(klass) 710 VALUE klass; @@ -1980,7 +1975,7 @@ This is summarized as follows: -```TODO-lang +``` SomeClass.new = Class#new (rb_class_new_instance) SomeClass.allocate = Class#allocate (rb_class_allocate_instance) SomeClass#initialize = Object#initialize (rb_obj_dummy) @@ -2025,7 +2020,7 @@ This is how to use: -```TODO-lang +```c struct my *ptr = malloc(sizeof(struct my)); /* arbitrarily allocate in the heap */ VALUE val = Data_Wrap_Struct(data_class, mark_f, free_f, ptr); ``` @@ -2046,7 +2041,7 @@ Let's also look at the content of `Data_Wrap_Struct()`.

▼ `Data_Wrap_Struct()`

-```TODO-lang +```c 369 #define Data_Wrap_Struct(klass, mark, free, sval) \ 370 rb_data_object_alloc(klass, sval, \ (RUBY_DATA_FUNC)mark, \ @@ -2063,7 +2058,7 @@ Most of it is delegated to `rb_object_alloc()`.

▼ `rb_data_object_alloc()`

-```TODO-lang +```c 310 VALUE 311 rb_data_object_alloc(klass, datap, dmark, dfree) 312 VALUE klass; @@ -2107,7 +2102,7 @@ to do it, you can use the `Data_Get_Struct()` macro.

▼ `Data_Get_Struct()`

-```TODO-lang +```c 378 #define Data_Get_Struct(obj,type,sval) do {\ 379 Check_Type(obj, T_DATA); \ 380 sval = (type*)DATA_PTR(obj);\ @@ -2168,7 +2163,7 @@ So, -```TODO-lang +```c rb_define_allocator(rb_cMy, my_allocate); ``` diff --git a/load.md b/load.md index 8db93e1..2854897 100644 --- a/load.md +++ b/load.md @@ -4,7 +4,8 @@ title: Loading --- Translated by Vincent ISAMBART -h1(#chapter). Chapter 18: Loading +Chapter 18: Loading +=================== Outline ======= diff --git a/minimum.md b/minimum.md index 456a818..1f9970d 100644 --- a/minimum.md +++ b/minimum.md @@ -4,7 +4,8 @@ title: A Minimal Introduction to Ruby --- Translated by Sebastian Krause -h1(#chapter). Chapter 1: Introduction +Chapter 1: Introduction +======================= A Minimal Introduction to Ruby ============================== diff --git a/module.md b/module.md index b22b111..d8d2556 100644 --- a/module.md +++ b/module.md @@ -3,7 +3,8 @@ layout: default title: "Chapter 14: Context" --- -h1(#chapter). Chapter 14: Context +Chapter 14: Context +=================== The range covered by this chapter is really broad. First of all, I'll describe diff --git a/spec.md b/spec.md index fdc01b4..5f7608b 100644 --- a/spec.md +++ b/spec.md @@ -2,7 +2,9 @@ layout: default title: Ruby Language Details --- -h1(#chapter). Chapter 8 : Ruby Language Details + +Chapter 8 : Ruby Language Details +================================= I'll talk about the details of Ruby's syntax and evaluation, which haven't been covered yet. I didn't intend a complete exposition, diff --git a/syntree.md b/syntree.md index f9b87c0..7e7cd0f 100644 --- a/syntree.md +++ b/syntree.md @@ -3,7 +3,8 @@ layout: default title: "Chapter 12: Syntax tree construction" --- -h1(#chapter). Chapter 12: Syntax tree construction +Chapter 12: Syntax tree construction +==================================== Node ==== diff --git a/variable.md b/variable.md index c6c2b80..fd3e7e8 100644 --- a/variable.md +++ b/variable.md @@ -557,7 +557,7 @@ It's at this time that the reference counter (the `counter` member of `struct global_variable`) is necessary. I explained the general idea of a reference counter in the previous section "Garbage collection". Reviewing it briefly, when a new reference to the -structure is made, the counter in incremented by 1. When the reference +structure is made, the counter is incremented by 1. When the reference is not used anymore, the counter is decreased by 1. When the counter reaches 0, the structure is no longer useful so `free()` can be called. From b1e59e2510213b71f123af629f5d3bccd43d1be7 Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Sun, 18 Apr 2021 16:02:24 +0200 Subject: [PATCH 09/14] fix headers --- anyeval.md | 8 ++++---- class.md | 12 ++++++------ contextual.md | 16 ++++++++-------- evaluator.md | 6 +++--- fin.md | 8 ++++---- gc.md | 18 +++++++++--------- intro.md | 14 +++++++------- iterator.md | 6 +++--- load.md | 8 ++++---- method.md | 6 +++--- minimum.md | 14 +++++++------- module.md | 8 ++++---- name.md | 6 +++--- object.md | 10 +++++----- parser.md | 8 ++++---- preface.md | 10 +++++----- security.md | 2 +- spec.md | 12 ++++++------ syntree.md | 8 ++++---- thread.md | 10 +++++----- variable.md | 10 +++++----- yacc.md | 10 +++++----- 22 files changed, 105 insertions(+), 105 deletions(-) diff --git a/anyeval.md b/anyeval.md index e30e0bc..67522a2 100644 --- a/anyeval.md +++ b/anyeval.md @@ -4,10 +4,10 @@ title: "Chapter 17: Dynamic evaluation" --- Chapter 17: Dynamic evaluation ------------------------------- +============================== Overview -======== +-------- I have already finished to describe about the mechanism of the evaluator by the previous chapter. @@ -159,7 +159,7 @@ compiled when loading files. `eval` -====== +------ ### `eval()` @@ -541,7 +541,7 @@ when it reaches the link created at the evaluator (`vars`). `instance_eval` -=============== +--------------- ### The Whole Picture diff --git a/class.md b/class.md index 67ec16a..996a176 100644 --- a/class.md +++ b/class.md @@ -6,13 +6,13 @@ title: Classes and modules Translated by Vincent ISAMBART Chapter 4: Classes and modules ------------------------------- +============================== In this chapter, we'll see the details of the data structures created by classes and modules. Classes and methods definition -============================== +------------------------------ First, I'd like to have a look at how Ruby classes are defined at the C level. This chapter investigates almost only particular @@ -281,7 +281,7 @@ provided with `ruby`, that is to say not from a built-in library. ``` Singleton classes -================= +----------------- ### `rb_define_singleton_method()` @@ -759,7 +759,7 @@ What are singleton methods? They are methods defined in the singleton class of an object. Metaclasses -=========== +----------- ### Inheritance of singleton methods @@ -1173,7 +1173,7 @@ like figure 10.
Class names -=========== +----------- In this section, we will analyse how's formed the reciprocal conversion between class and class names, in other words @@ -1445,7 +1445,7 @@ space. However, generally, there aren't many constants so even searching all constants does not take too much time. Include -======= +------- We only talked about classes so let's finish this chapter with something else and talk about module inclusion. diff --git a/contextual.md b/contextual.md index d7e3afb..2567dcf 100644 --- a/contextual.md +++ b/contextual.md @@ -10,10 +10,10 @@ more effort in this translation than I could ever wish for. Without them, I would be still figuring out what `COND_LEXPOP()` actually does._ Chapter 11 Finite-state scanner -------------------------------- +=============================== Outline -======= +------- In theory, the scanner and the parser are completely independent of each other – the scanner is supposed to recognize tokens, while the parser is supposed to @@ -275,7 +275,7 @@ only appears in a limited number of places, not warranting any special attention. Line-break handling -=================== +------------------- ### The problem @@ -423,7 +423,7 @@ Note that `class` becomes `tIDENTIFIER` despite being a reserved word. This is discussed in the next section. Reserved words and identical method names -========================================= +----------------------------------------- ### The problem @@ -585,7 +585,7 @@ danger to parsing here, but if this is forgotten, the scanner will not pass values to reserved words and value calculation will be disrupted. Modifiers -========= +--------- ### The problem @@ -726,7 +726,7 @@ if rescue unless until while ``` The `do` conflict -================= +----------------- ### The problem @@ -986,7 +986,7 @@ and `LEXPOP`. Basically, at this time it would be correct to say that `COND_LEXPOP()` has no meaning. `tLPAREN_ARG`(1) -================ +---------------- ### The problem @@ -1343,7 +1343,7 @@ passing of arrays or passing of blocks. With this, the scope is now sufficiently broad. `tLPAREN_ARG`(2) -================ +---------------- ### The problem diff --git a/evaluator.md b/evaluator.md index 02f4d95..636cd63 100644 --- a/evaluator.md +++ b/evaluator.md @@ -7,7 +7,7 @@ Chapter 13: Structure of the evaluator ====================================== Outline -======= +------- ### Interface @@ -461,7 +461,7 @@ This is also not important, so we won't see this. `rb_eval()` -=========== +----------- ### Outline @@ -1275,7 +1275,7 @@ Fig.6: Transferring the return value Exception -========= +--------- As the second example of the usage of "tag jump", we'll look at how exceptions diff --git a/fin.md b/fin.md index 9b22f68..bf71f65 100644 --- a/fin.md +++ b/fin.md @@ -3,10 +3,10 @@ layout: default --- Final Chapter: Ruby's future ----------------------------- +============================ Issues to be addressed -====================== +---------------------- `ruby` isn't 'completely finished' software. It's still being developed, there are still a lot of issues. Firstly, we want to try removing @@ -167,7 +167,7 @@ cannot continue to use eternally, isn't it? `ruby` 2 -======== +-------- Subsequently, on the other hand, I'll introduce the trend of the original `ruby`, how it is trying to counter these issues. @@ -344,7 +344,7 @@ Therefore, it seems Rite will have its own `stdio`. Ruby Hacking Guide -================== +------------------ So far, we've always acted as observers who look at `ruby` from outside. diff --git a/gc.md b/gc.md index 093c59d..8a24829 100644 --- a/gc.md +++ b/gc.md @@ -5,10 +5,10 @@ title: Garbage Collection Translated by Sebastian Krause & ocha- Chapter 5: Garbage Collection ------------------------------ +============================= A conception of an executing program -==================================== +------------------------------------ It's all of a sudden but at the beginning of this chapter, we'll learn about the memory space of an executing program. In this chapter @@ -96,7 +96,7 @@ The `missing/alloca.c` of `ruby` is an example of an emulated `alloca()` . Overview -======== +-------- From here on we can at last talk about the main subject of this chapter: garbage collection. @@ -306,7 +306,7 @@ but because it sometimes invokes mark and sweep GC to check. Object Management -================= +----------------- Ruby's garbage collection is only concerned with ruby objects. Moreover, it only concerned with the objects created and managed by `ruby`. @@ -589,7 +589,7 @@ And take a struct from `freelist`, zerofill it by `MEMZERO()`, and return it. Mark -==== +---- As described, `ruby`'s GC is Mark & Sweep. @@ -1416,7 +1416,7 @@ Until here, the mark phase has been finished. Sweep -===== +----- ### The special treatment for `NODE` @@ -1614,7 +1614,7 @@ several times in Part 2 and Part 3. Discussions -=========== +----------- ### To free spaces @@ -1835,7 +1835,7 @@ but it seems it could not be applied to `ruby` because its algorithm has a hole. When to invoke -============== +-------------- ### Inside `gc.c` @@ -1879,7 +1879,7 @@ that `NODE` cannot be garbage collected while compiling. Object Creation -=============== +--------------- We've finished about GC and come to be able to deal with the Ruby objects from diff --git a/intro.md b/intro.md index d5511a2..d813cf7 100644 --- a/intro.md +++ b/intro.md @@ -4,10 +4,10 @@ title: Introduction --- Introduction ------------- +============ Characteristics of Ruby -======================= +----------------------- Some of the readers may have already been familiar with Ruby, but (I hope) there are also many readers who have not. First let's go though a @@ -356,7 +356,7 @@ It's, say, probably the slowest of all user-level thread implementations in this The tendency of `ruby` implementation may be seen here the most clearly. Technique to read source code -============================= +----------------------------- Well. After an introduction of `ruby`, we are about to start reading source code. But wait. @@ -643,7 +643,7 @@ features. * function cross-reference Build -===== +----- ### Target version @@ -853,7 +853,7 @@ would be fairly long, so I'll explain it comprehensively in `doc/build.html` of the attached CD-ROM. Building Details -================ +---------------- Until here, it has been the `README`-like description. This time, let's look at exactly what is done by what we have been done. @@ -1007,7 +1007,7 @@ libraries, but the extension libraries cannot be compiled without `ruby`. In order to resolve this dilemma, it uses `miniruby`. `CVS` -===== +----- The `ruby` archive included in the attached CD-ROM is, as the same as the official release package, @@ -1131,7 +1131,7 @@ Regarding books which you can read in Japanese, I recommend translated "Open Source Development with CVS" Karl Fogel, Moshe Bar. The composition of `ruby` -========================= +------------------------- ### The physical structure diff --git a/iterator.md b/iterator.md index fcea399..c083628 100644 --- a/iterator.md +++ b/iterator.md @@ -4,10 +4,10 @@ title: "Chapter 16: Blocks" --- Chapter 16: Blocks ------------------- +================== Iterator -======== +-------- In this chapter, `BLOCK`, which is the last big name among the seven Ruby stacks, @@ -890,7 +890,7 @@ Hence, we need to check the previous one. `Proc` -====== +------ To describe a `Proc` object from the viewpoint of implementing, diff --git a/load.md b/load.md index 2854897..c9e1770 100644 --- a/load.md +++ b/load.md @@ -8,7 +8,7 @@ Chapter 18: Loading =================== Outline -======= +------- ### Interface @@ -203,7 +203,7 @@ referred to as plug-ins, works. This is the most interesting part of this chapter, so I'd like to use as many pages as possible to talk about it. Searching the library -===================== +--------------------- ### `rb_f_require()` @@ -455,7 +455,7 @@ Figure 1: Serialisation of loads Loading of Ruby programs -======================== +------------------------ ### `rb_load()` @@ -698,7 +698,7 @@ dynamic links, and there are the other four `open`s. Thus it seems the three of them are useless. Loading of extension libraries -============================== +------------------------------ ### `rb_f_require()`-`load_dyna` diff --git a/method.md b/method.md index 45838cd..ad7b0ea 100644 --- a/method.md +++ b/method.md @@ -4,13 +4,13 @@ title: "Chapter 15: Methods" --- Chapter 15: Methods -------------------- +=================== In this chapter, I'll talk about method searching and invoking. Searching methods -================= +----------------- ### Terminology @@ -418,7 +418,7 @@ This is awesome. Apparently, the effect of "it is know as ..." is outstanding. Invocation -========== +---------- ### `rb_call0()` diff --git a/minimum.md b/minimum.md index 1f9970d..e200b0a 100644 --- a/minimum.md +++ b/minimum.md @@ -8,7 +8,7 @@ Chapter 1: Introduction ======================= A Minimal Introduction to Ruby -============================== +------------------------------ Here the Ruby prerequisites are explained, which one needs to know in order to understand the first section. @@ -24,7 +24,7 @@ I won't omit things even if I can. This way the syntax becomes more simple. I won't always say "We can omit this". Objects -======= +------- ### Strings @@ -149,7 +149,7 @@ Here the method `downcase` is called on the return value of `"content".upcase()` There are no public fields (member variables) as in Java or C++. The object interface consists of methods only. The Program -=========== +----------- ### Top Level @@ -329,7 +329,7 @@ It wouldn't be wise if there were just `false`, there is also `true`. And it is of course true. Classes and Methods -=================== +------------------- ### Classes @@ -927,7 +927,7 @@ it would look like Fig.9. The Program revisited -===================== +--------------------- Caution. This section is extremely important and explaining the elements which are not easy to mix with for programmers who have only used static languages before. @@ -1201,7 +1201,7 @@ end ``` More about Classes -================== +------------------ ### The talk about Constants still goes on @@ -1516,7 +1516,7 @@ B.new().print_cvar() # Shows "ok" ``` Global Variables -================ +---------------- At last there are also global variables. They can be referenced from everywhere and assigned everywhere. The first letter of the name is a `$`. diff --git a/module.md b/module.md index d8d2556..23eb583 100644 --- a/module.md +++ b/module.md @@ -18,7 +18,7 @@ behaviors of the variable definitions and the variable references. The Ruby stack -============== +-------------- ### Context and Stack @@ -512,7 +512,7 @@ are not stacks. Actually, most of them are not. Module Definition -================= +----------------- The @class@ statement and the @module@ statement and the singleton class @@ -1240,7 +1240,7 @@ of "pushed". The method definition -===================== +--------------------- As the next topic of the module definitions, let's look at the method definitions. @@ -1518,7 +1518,7 @@ this time. It is used when having to do with @alias@. Assignment and Reference -======================== +------------------------ Come to think of it, most of the stacks are used to realize a variety of diff --git a/name.md b/name.md index ed5a966..ee33a54 100644 --- a/name.md +++ b/name.md @@ -6,10 +6,10 @@ title: Names and Name Table Translated by Clifford Escobar CAOILE Chapter 3: Names and Name Table -------------------------------- +=============================== `st_table` -========== +---------- `st_table` has already appeared several times as a method table and an instance table. In this chapter let's look at the structure of the `st_table` in detail. @@ -540,7 +540,7 @@ not registered will it be added. If there is a insertion, return 0. If there is no insertion, return a 1. `ID` and Symbols -================ +---------------- I've already discussed what an `ID` is. It is a correspondence between an arbitrary string of characters and a value. It is used to declare various diff --git a/object.md b/object.md index 4ec5a20..361ddda 100644 --- a/object.md +++ b/object.md @@ -5,10 +5,10 @@ title: Objects - Structure of Ruby objects Translated by Vincent ISAMBART Chapter 2: Objects ------------------- +================== Structure of Ruby objects -========================= +------------------------- ### Guideline @@ -446,7 +446,7 @@ This value is used to express an undefined value in the interpreter. It can't (must not) be found at all at the Ruby level. Methods -======= +------- I already brought up the three important points of a Ruby object: having an identity, being able to call a method, and keeping data for each instance. In @@ -557,7 +557,7 @@ from the second time it will be found without following `super` one by one. This cache and its search will be seen in the 15th chapter "Methods." Instance variables -================== +------------------ In this section, I will explain the implementation of the third essential condition, instance variables. @@ -899,7 +899,7 @@ Thus `FL_xxxx()` is designed to always return false for such object. Hence, objects that are `rb_special_const_p()` should be treated specially here. Object Structs -============== +-------------- In this section, about the important ones among object structs, we'll briefly see their concrete appearances and how to deal with them. diff --git a/parser.md b/parser.md index 707d0d4..0d36983 100644 --- a/parser.md +++ b/parser.md @@ -5,10 +5,10 @@ title: Parser Translated by Robert GRAVINA & ocha- Chapter 10: Parser ------------------- +================== Outline of this chapter -======================= +----------------------- ### Parser construction @@ -78,7 +78,7 @@ explained in this book. |`ID` implementation|Chapter 3 "Names and name tables"|Section 2 "`ID` and symbols"| General remarks about grammar rules -=================================== +----------------------------------- ### Coding rules @@ -1003,7 +1003,7 @@ That's why `yacc` could not be used for ordinary HTML at all. Scanner -======= +------- ### Parser Outline diff --git a/preface.md b/preface.md index 5e98bde..79c902e 100644 --- a/preface.md +++ b/preface.md @@ -74,7 +74,7 @@ the book that lets you practice such idealism exhaustively. who think so will increase because of this book. Target audience -=============== +--------------- Firstly, knowledge about the Ruby language isn't required. However, since the knowledge of the Ruby language is absolutely necessary to understand certain @@ -91,7 +91,7 @@ object-oriented languages, you will probably have a difficult time. In this book, I tried to use many examples in Java and C++. Structure of this book -====================== +---------------------- This book has four main parts: @@ -140,7 +140,7 @@ parentheses after the explanation indicates the difficulty gauge. They are ==(C) * Chapter19 — Describes the implementation of thread at the end of the core part. (A) Environment -=========== +----------- This book describes on `ruby` 1.7.3 2002-09-12 version. It's attached on the CD-ROM. Choose any one of `ruby-rhg.tar.gz`, `ruby-rhg.lzh`, or `ruby-rhg.zip` @@ -213,7 +213,7 @@ attempting to contact these people directly. If there's any flaw in execution, please be advised to contact the author by e-mail: `aamine@loveruby.net`. Web site -======== +-------- The web site for this book is `http://i.loveruby.net/ja/rhg/`. I will add information about related programs and additional documentation, as @@ -223,7 +223,7 @@ circumstance to publicize more chapters, and the whole contents of the book will be at this website at the end. Acknowledgment -============== +-------------- First of all, I would like to thank Mr. Yukihiro Matsumoto. He is the author of Ruby, and he made it in public as an open source software. Not only he diff --git a/security.md b/security.md index 6005f30..9e41d5f 100644 --- a/security.md +++ b/security.md @@ -5,7 +5,7 @@ title: Security Translated by Clifford Escobar CAOILE & ocha- Chapter 7: Security -------------------- +=================== ### Fundamentals diff --git a/spec.md b/spec.md index 5f7608b..6a8da40 100644 --- a/spec.md +++ b/spec.md @@ -16,7 +16,7 @@ reading this. A complete exposition can be found in the Readers who know Ruby can skip over this chapter. Literals -======== +-------- The expressiveness of Ruby's literals is extremely high. In my opinion, what makes Ruby a script language @@ -487,7 +487,7 @@ From here on in this book, we'll completely forget about numerical values. Methods -======= +------- Let's talk about the definition and calling of methods. @@ -963,7 +963,7 @@ Hence such method is very likely a method in which only procedures are written (like `sin`). That's why they are called module "functions". Iterators -========= +--------- Ruby's iterators differ a bit from Java's or C++'s iterator classes or 'Iterator' design pattern. Precisely speaking, those iterators @@ -1303,7 +1303,7 @@ end ``` Expressions -=========== +----------- "Expressions" in Ruby are things with which we can create other expressions or statements by combining with the others. @@ -1792,7 +1792,7 @@ I would have loved to tell you more about @defined?@ but it will not appear again in this book. What a pity. Statements -========== +---------- A statement is what basically cannot be combined with the other syntaxes, in other words, they are lined vertically. @@ -2107,7 +2107,7 @@ It's because the sign put up by `undef` prohibits any kind of searches. ((errata: It can be redefined by using `def`)) Some more small topics -====================== +---------------------- ### Comments diff --git a/syntree.md b/syntree.md index 7e7cd0f..8624bf5 100644 --- a/syntree.md +++ b/syntree.md @@ -7,7 +7,7 @@ Chapter 12: Syntax tree construction ==================================== Node -==== +---- ### @NODE@ @@ -354,7 +354,7 @@ a struct type that has the above seven members. Syntax Tree Construction -======================== +------------------------ The role of the parser is to convert the source code that is a byte sequence to a @@ -1059,7 +1059,7 @@ I'd like you to recall this and think "Oh, this uses the length". Semantic Analysis -================= +----------------- As I briefly mentioned at the beginning of Part 2, there are two types of analysis @@ -1411,7 +1411,7 @@ Fig.7: the call graph of the value check functions Local Variables -=============== +--------------- ### Local Variable Definitions diff --git a/thread.md b/thread.md index 873c547..f6ca202 100644 --- a/thread.md +++ b/thread.md @@ -3,10 +3,10 @@ layout: default title: "Chapter 19: Threads" --- Chapter 19: Threads -------------------- +=================== Outline -======= +------- ### Ruby Interface @@ -327,7 +327,7 @@ respectively. Trigger -======= +------- To begin with, it's the first point, when to switch threads. @@ -699,7 +699,7 @@ after `THREAD_TICK` (=500) times going through `CHECK_INTS`. Scheduling -========== +---------- The second point is to which thread to switch. @@ -1146,7 +1146,7 @@ I'll start a new section. Context Switch -============== +-------------- The last third point is thread-switch, diff --git a/variable.md b/variable.md index fd3e7e8..bf7c22e 100644 --- a/variable.md +++ b/variable.md @@ -6,10 +6,10 @@ title: Variables and constants Translated by Vincent ISAMBART Chapter 6: Variables and constants ----------------------------------- +================================== Outline of this chapter -======================= +----------------------- ### Ruby variables @@ -99,7 +99,7 @@ like "It behaves like this in this situation so its implementation couldn't be other then this!" Class variables -=============== +--------------- Class variables are variables that belong to classes. In Java or C++ they are called static variables. They can be accessed from both the @@ -169,7 +169,7 @@ level, the variable type is determined only by the prefix so there's no way to access a class variable called `@var` from Ruby. Constants -========= +--------- It's a little abrupt but I'd like you to remember the members of `struct RClass`. If we exclude the `basic` member, `struct RClass` @@ -438,7 +438,7 @@ evaluator is handled. Specifically, this search in other classes is done in the with the constants in the third part of the book. Global variables -================ +---------------- ### General remarks diff --git a/yacc.md b/yacc.md index 0970677..59c85b6 100644 --- a/yacc.md +++ b/yacc.md @@ -5,10 +5,10 @@ title: YACC crash course Translated by Vincent ISAMBART & ocha- Chapter 9: `yacc` crash course ------------------------------- +============================== Outline -======= +------- ### Parser and scanner @@ -109,7 +109,7 @@ book" because of the dragon on its cover) by Alfred V. Aho, Ravi Sethi and Jeffrey D. Ullman. Grammar -======= +------- ### Grammar file @@ -496,7 +496,7 @@ list: ``` Construction of values -====================== +---------------------- This abstract talk lasted long enough so in this section I'd really like to go on with a more concrete talk. @@ -900,7 +900,7 @@ the value of the embedded action will come out as `$3`. Practical Topics -================ +---------------- ### Conflicts From 3885b529c9f64f056cb5fbed8ee903e66927b249 Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Sun, 18 Apr 2021 16:29:27 +0200 Subject: [PATCH 10/14] fix all @ instead of backticks --- module.md | 737 +++++++++++++++++++++++++++-------------------------- spec.md | 192 +++++++------- syntree.md | 736 ++++++++++++++++++++++++++-------------------------- 3 files changed, 837 insertions(+), 828 deletions(-) diff --git a/module.md b/module.md index 23eb583..3eb464d 100644 --- a/module.md +++ b/module.md @@ -36,8 +36,8 @@ is a perfect example. What to be careful about here is, what is changing during the execution is only the stack, on the contrary, the program remains unchanged wherever it is. -For example, if it is "a reference to the local variable @i@", there's just an -order of "give me @i@ of the current frame", it is not written as "give me @i@ +For example, if it is "a reference to the local variable `i`", there's just an +order of "give me `i` of the current frame", it is not written as "give me `i` of that frame". In other words, "only" the state of the stack influences the consequence. This is why, even if a procedure is called anytime and any number of times, @@ -58,7 +58,7 @@ And these kind of scopes are expressed by stacks. However in Ruby, for instance, you can temporarily go back to the scope -previously used by using iterators or @Proc@. +previously used by using iterators or `Proc`. This cannot be implemented with just simply pushing/popping a stack. Therefore the frames of the Ruby stack will be intricately rearranged during execution. Although I call it "stack", it could be better to consider it @@ -72,13 +72,14 @@ separately. For these various reasons, surprisingly, there are seven stacks. -|_. Stack Pointer |_. Stack Frame Type |_. Description | -| @ruby_frame@ | @struct FRAME@ | the records of method calls | -| @ruby_scope@ | @struct SCOPE@ | the local variable scope | -| @ruby_block@ | @struct BLOCK@ | the block scope | -| @ruby_iter@ | @struct iter@ | whether or not the current @FRAME@ is an iterator | -| @ruby_class@ | @VALUE@ | the class to define methods on | -| @ruby_cref@ | @NODE@ ( @NODE_CREF@ ) | the class nesting information | +| Stack Pointer | Stack Frame Type | Description | +| ------------- | -------------------- | ------------------------------------------------- | +| `ruby_frame` | `struct FRAME` | the records of method calls | +| `ruby_scope` | `struct SCOPE` | the local variable scope | +| `ruby_block` | `struct BLOCK` | the block scope | +| `ruby_iter` | `struct iter` | whether or not the current `FRAME` is an iterator | +| `ruby_class` | `VALUE` | the class to define methods on | +| `ruby_cref` | `NODE` (`NODE_CREF`) | the class nesting information | C has only one stack and Ruby has seven stacks, by simple arithmetic, the @@ -88,13 +89,13 @@ it's at least twenty times more complicated. First, I'll briefly describe about these stacks and their stack frame structs. -The defined file is either @eval.c@ or @evn.h@. Basically these stack frames -are touched only by @eval.c@ ... is what it should be if it were possible, -but @gc.c@ needs to know the struct types when marking, -so some of them are exposed in @env.h@. +The defined file is either `eval.c` or `evn.h`. Basically these stack frames +are touched only by `eval.c` ... is what it should be if it were possible, +but `gc.c` needs to know the struct types when marking, +so some of them are exposed in `env.h`. -Of course, marking could be done in the other file but @gc.c@, but it requires +Of course, marking could be done in the other file but `gc.c`, but it requires separated functions which cause slowing down. The ordinary programs had better not care about such things, but both the garbage collector and the core of the evaluator is the ruby's biggest bottleneck, so it's quite worth to optimize @@ -103,16 +104,16 @@ even for just one method call. -### @ruby_frame@ +### `ruby_frame` -@ruby_frame@ is a stack to record method calls. The stack frame struct is -@struct FRAME@. This terminology is a bit confusing but please be aware that +`ruby_frame` is a stack to record method calls. The stack frame struct is +`struct FRAME`. This terminology is a bit confusing but please be aware that I'll distinctively write it just a frame when it means a "stack frame" as a -general noun and @FRAME@ when it means @struct FRAME@. +general noun and `FRAME` when it means `struct FRAME`. -

▼ @ruby_frame@

+

▼ `ruby_frame`

```TODO-lang 16 extern struct FRAME { @@ -137,45 +138,45 @@ general noun and @FRAME@ when it means @struct FRAME@. ``` -First af all, since there's the @prev@ member, you can infer that the stack is +First af all, since there's the `prev` member, you can infer that the stack is made of a linked list. (Fig.2)

(framestack)
-Fig.2: @ruby_frame@ +Fig.2: `ruby_frame`

-The fact that @ruby_xxxx@ points to the top stack frame is common to all stacks +The fact that `ruby_xxxx` points to the top stack frame is common to all stacks and won't be mentioned every time. -The first member of the struct is @self@. -There is also @self@ in the arguments of @rb_eval()@, -but why this struct remembers another @self@ ? -This is for the C-level functions. More precisely, it's for @rb_call_super()@ that is -corresponding to @super@. In order to execute @super@, it requires the receiver -of the current method, but the caller side of @rb_call_super()@ could not have -such information. However, the chain of @rb_eval()@ is interrupted before the +The first member of the struct is `self`. +There is also `self` in the arguments of `rb_eval()`, +but why this struct remembers another `self` ? +This is for the C-level functions. More precisely, it's for `rb_call_super()` that is +corresponding to `super`. In order to execute `super`, it requires the receiver +of the current method, but the caller side of `rb_call_super()` could not have +such information. However, the chain of `rb_eval()` is interrupted before the time when the execution of the user-defined C code starts. Therefore, the -conclusion is that there need a way to obtain the information of @self@ out of -nothing. And, @FRAME@ is the right place to store it. +conclusion is that there need a way to obtain the information of `self` out of +nothing. And, `FRAME` is the right place to store it. -Thinking a little further, It's mysterious that there are @argc@ and @argv@. +Thinking a little further, It's mysterious that there are `argc` and `argv`. Because parameter variables are local variables after all, it is unnecessary to preserve the given arguments after assigning them into the local variable with the same names at the beginning of the method, isn't it? Then, what is the use of them ? The answer is that this is actually for -@super@ again. In Ruby, when calling @super@ without any arguments, the values of +`super` again. In Ruby, when calling `super` without any arguments, the values of the parameter variables of the method will be passed to the method of the superclass. Thus, (the local variable space for) the parameter variables must be reserved. -Additionally, the difference between @last_func@ and @orig_func@ will come -out in the cases like when the method is @alias@ ed. +Additionally, the difference between `last_func` and `orig_func` will come +out in the cases like when the method is `alias` ed. For instance, @@ -189,23 +190,23 @@ C.new.ali ``` -in this case, @last_func=ali@ and @orig_func=orig@. -Not surprisingly, these members also have to do with @super@. +in this case, `last_func=ali` and `orig_func=orig`. +Not surprisingly, these members also have to do with `super`. -### @ruby_scope@ +### `ruby_scope` -@ruby_scope@ is the stack to represent the local variable scope. The method and +`ruby_scope` is the stack to represent the local variable scope. The method and class definition statements, the module definition statements and the singleton class definition statements, all of them are different scopes. The stack frame -struct is @struct SCOPE@. -I'll call this frame @SCOPE@. +struct is `struct SCOPE`. +I'll call this frame `SCOPE`. -

▼ @ruby_scope@

+

▼ `ruby_scope`

```TODO-lang 36 extern struct SCOPE { @@ -224,8 +225,8 @@ I'll call this frame @SCOPE@. ``` -Since the first element is @struct RBasic@, this is a Ruby object. This is in -order to handle @Proc@ objects. For example, let's try to think about the case +Since the first element is `struct RBasic`, this is a Ruby object. This is in +order to handle `Proc` objects. For example, let's try to think about the case like this: @@ -244,14 +245,14 @@ cnt = nil # cut the reference. The created Proc finally becomes unnecessary her ``` -The @Proc@ object created by this method will persist longer than the method that -creates it. And, because the @Proc@ can refer to the local variable @lvar@, -the local variables must be preserved until the @Proc@ will disappear. +The `Proc` object created by this method will persist longer than the method that +creates it. And, because the `Proc` can refer to the local variable `lvar`, +the local variables must be preserved until the `Proc` will disappear. Thus, if it were not handled by the garbage collector, no one can determine the time to free. -There are two reasons why @struct SCOPE@ is separated from @struct FRAME@. +There are two reasons why `struct SCOPE` is separated from `struct FRAME`. Firstly, the things like class definition statements are not method calls but create distinct local variable scopes. Secondly, when a called method is defined in C the Ruby's local @@ -260,16 +261,16 @@ variable space is unnecessary. -### @ruby_block@ +### `ruby_block` -@struct BLOCK@ is the real body of a Ruby's iterator block or a @Proc@ object, +`struct BLOCK` is the real body of a Ruby's iterator block or a `Proc` object, it is also kind of a snapshot of the evaluator at some point. -This frame will also be briefly written as @BLOCK@ as in the same manner as -@FRAME@ and @SCOPE@. +This frame will also be briefly written as `BLOCK` as in the same manner as +`FRAME` and `SCOPE`. -

▼ @ruby_block@

+

▼ `ruby_block`

```TODO-lang 580 static struct BLOCK *ruby_block; @@ -306,29 +307,29 @@ This frame will also be briefly written as @BLOCK@ as in the same manner as ``` -Note that @frame@ is not a pointer. This is because the entire content of -@struct FRAME@ will be all copied and preserved. The entire @struct FRAME@ is -(for better performance) allocated on the machine stack, but @BLOCK@ could -persist longer than the @FRAME@ that creates it, the preservation is a +Note that `frame` is not a pointer. This is because the entire content of +`struct FRAME` will be all copied and preserved. The entire `struct FRAME` is +(for better performance) allocated on the machine stack, but `BLOCK` could +persist longer than the `FRAME` that creates it, the preservation is a preparation for that case. -Additionally, @struct BLOCKTAG@ is separated in order to detect the same block -when multiple @Proc@ objects are created from the block. The @Proc@ objects -which were created from the one same block have the same @BLOCKTAG@. +Additionally, `struct BLOCKTAG` is separated in order to detect the same block +when multiple `Proc` objects are created from the block. The `Proc` objects +which were created from the one same block have the same `BLOCKTAG`. -### @ruby_iter@ +### `ruby_iter` -The stack @ruby_iter@ indicates whether currently calling method is an iterator -(whether it is called with a block). The frame is @struct iter@. -But for consistency I'll call it @ITER@. +The stack `ruby_iter` indicates whether currently calling method is an iterator +(whether it is called with a block). The frame is `struct iter`. +But for consistency I'll call it `ITER`. -

▼ @ruby_iter@

+

▼ `ruby_iter`

```TODO-lang 767 static struct iter *ruby_iter; @@ -346,28 +347,28 @@ But for consistency I'll call it @ITER@. Although for each method we can determine whether it is an iterator or not, -there's another struct that is distinct from @struct FRAME@. Why? +there's another struct that is distinct from `struct FRAME`. Why? It's obvious you need to inform it to the method when "it is an iterator", but you also need to inform the fact when "it is not an iterator". -However, pushing a whole @BLOCK@ just for this is very heavy. It will also +However, pushing a whole `BLOCK` just for this is very heavy. It will also cause that in the caller side the procedures such as variable references would needlessly increase. -Thus, it's better to push the smaller and lighter @ITER@ instead of @BLOCK@. +Thus, it's better to push the smaller and lighter `ITER` instead of `BLOCK`. This will be discussed in detail in Chapter 16: Blocks. -### @ruby_dyna_vars@ +### `ruby_dyna_vars` -The block local variable space. The frame struct is @struct RVarmap@ that has -already seen in Part 2. From now on, I'll call it just @VARS@. +The block local variable space. The frame struct is `struct RVarmap` that has +already seen in Part 2. From now on, I'll call it just `VARS`. -

▼ @struct RVarmap@

+

▼ `struct RVarmap`

```TODO-lang 52 struct RVarmap { @@ -381,53 +382,53 @@ already seen in Part 2. From now on, I'll call it just @VARS@. ``` -Note that a frame is not a single @struct RVarmap@ but a list of the structs (Fig.3). +Note that a frame is not a single `struct RVarmap` but a list of the structs (Fig.3). And each frame is corresponding to a local variable scope. Since it corresponds to "local variable scope" and not "block local variable scope", for instance, even if blocks are nested, only a single list is used to express. The break between blocks are similar to the one of the parser, -it is expressed by a @RVarmap@ (header) whose @id@ is @0@. +it is expressed by a `RVarmap` (header) whose `id` is `0`. Details are deferred again. It will be explained in Chapter 16: Blocks.

(vars)
-Fig.3: @ruby_dyna_vars@ +Fig.3: `ruby_dyna_vars`

-### @ruby_class@ +### `ruby_class` -@ruby_class@ represents the current class to which a method is defined. Since -@self@ will be that class when it's a normal class definition statement, -@ruby_class == self@. But, when it is the top level or in the middle of -particular methods like @eval@ and @instance_eval@, @self != ruby_class@ is +`ruby_class` represents the current class to which a method is defined. Since +`self` will be that class when it's a normal class definition statement, +`ruby_class == self`. But, when it is the top level or in the middle of +particular methods like `eval` and `instance_eval`, `self != ruby_class` is possible. -The frame of @ruby_class@ is a simple @VALUE@ and there's no particular frame +The frame of `ruby_class` is a simple `VALUE` and there's no particular frame struct. Then, how could it be like a stack? Moreover, there were many structs -without the @prev@ pointer, how could these form a stack? The answer is deferred +without the `prev` pointer, how could these form a stack? The answer is deferred to the next section. -From now on, I'll call this frame @CLASS@. +From now on, I'll call this frame `CLASS`. -### @ruby_cref@ +### `ruby_cref` -@ruby_cref@ represents the information of the nesting of a class. -I'll call this frame @CREF@ with the same way of naming as before. +`ruby_cref` represents the information of the nesting of a class. +I'll call this frame `CREF` with the same way of naming as before. Its struct is ... -

▼ @ruby_cref@

+

▼ `ruby_cref`

```TODO-lang 847 static NODE *ruby_cref = 0; @@ -436,19 +437,20 @@ Its struct is ... ``` -... surprisingly @NODE@. This is used just as a "defined struct which can be -pointed by a @VALUE@". The node type is @NODE_CREF@ and the assignments of its +... surprisingly `NODE`. This is used just as a "defined struct which can be +pointed by a `VALUE`". The node type is `NODE_CREF` and the assignments of its members are shown below: -|_. Union Member |_. Macro To Access |_. Usage | -| u1.value | nd_clss | the outer class ( @VALUE@ ) | -| u2 | - | - | -| u3.node | nd_next | preserve the previous @CREF@ | +| Union Member | Macro To Access | Usage | +| ------------ | --------------- | ---------------------------- | +| u1.value | nd_clss | the outer class (`VALUE`) | +| u2 | - | - | +| u3.node | nd_next | preserve the previous `CREF` | -Even though the member name is @nd_next@, the value it actually has is the -"previous (prev)" @CREF@. Taking the following program as an example, I'll +Even though the member name is `nd_next`, the value it actually has is the +"previous (prev)" `CREF`. Taking the following program as an example, I'll explain the actual appearance. @@ -464,11 +466,11 @@ end ``` -Fig.4 shows how @ruby_cref@ is when evaluating the code (A). +Fig.4 shows how `ruby_cref` is when evaluating the code (A).

(crefstack)
-Fig.4: @ruby_cref@ +Fig.4: `ruby_cref`

@@ -482,10 +484,10 @@ A ← B ← C ``` -### @PUSH@ / @POP@ Macros +### `PUSH` / `POP` Macros For each stack frame struct, the macros to push and pop are available. -For instance, @PUSH_FRAME@ and @POP_FRAME@ for @FRAME@. +For instance, `PUSH_FRAME` and `POP_FRAME` for `FRAME`. Because these will appear in a moment, I'll then explain the usage and content. @@ -493,18 +495,19 @@ I'll then explain the usage and content. ### The other states -While they are not so important as the main stacks, the evaluator of @ruby@ has +While they are not so important as the main stacks, the evaluator of `ruby` has the several other states. This is a brief list of them. However, some of them are not stacks. Actually, most of them are not. -|_. Variable Name |_. Type |_. Meaning | -| @scope_vmode@ | @int@ | the default visibility when a method is defined | -| @ruby_in_eval@ | @int@ | whether or not parsing after the evaluation is started | -| @ruby_current_node@ | @NODE*@ | the file name and the line number of what currently being evaluated | -| @ruby_safe_level@ | @int@ | @$SAFE@ | -| @ruby_errinfo@ | @VALUE@ | the exception currently being handled | -| @ruby_wrapper@ | @VALUE@ | the wrapper module to isolate the environment | +| Variable Name | Type | Meaning | +| ------------------- | ------- | ------------------------------------------------------------------- | +| `scope_vmode` | `int` | the default visibility when a method is defined | +| `ruby_in_eval` | `int` | whether or not parsing after the evaluation is started | +| `ruby_current_node` | `NODE*` | the file name and the line number of what currently being evaluated | +| `ruby_safe_level` | `int` | `$SAFE` | +| `ruby_errinfo` | `VALUE` | the exception currently being handled | +| `ruby_wrapper` | `VALUE` | the wrapper module to isolate the environment | @@ -515,24 +518,24 @@ Module Definition ----------------- -The @class@ statement and the @module@ statement and the singleton class +The `class` statement and the `module` statement and the singleton class definition statement, they are all implemented in similar ways. Because seeing similar things continuously three times is not interesting, this time let's -examine the @module@ statement which has the least elements (thus, is simple). +examine the `module` statement which has the least elements (thus, is simple). -First of all, what is the @module@ statement? Conversely, what should happen is -the module @statement@ ? Let's try to list up several features: +First of all, what is the `module` statement? Conversely, what should happen is +the module `statement` ? Let's try to list up several features: * a new module object should be created -* the created module should be @self@ +* the created module should be `self` * it should have an independent local variable scope * if you write a constant assignment, a constant should be defined on the module * if you write a class variable assignment, a class variable should be defined on the module. -* if you write a @def@ statement, a method should be defined on the module +* if you write a `def` statement, a method should be defined on the module What is the way to achieve these things? ... is the point of this section. @@ -570,25 +573,25 @@ nd_body: ``` -@nd_cname@ seems the module name. @cname@ is probably either Const NAME or Class -NAME. I dumped several things and found that there's always @NODE_SCOPE@ in -@nd_body@. Since its member @nd_tbl@ holds a local variable table and its name -is similar to @struct SCOPE@, it appears certain that this @NODE_SCOPE@ +`nd_cname` seems the module name. `cname` is probably either Const NAME or Class +NAME. I dumped several things and found that there's always `NODE_SCOPE` in +`nd_body`. Since its member `nd_tbl` holds a local variable table and its name +is similar to `struct SCOPE`, it appears certain that this `NODE_SCOPE` plays an important role to create a local variable scope. -### @NODE_MODULE@ +### `NODE_MODULE` -Let's examine the handler of @NODE_MODULE@ of @rb_eval()@. The parts that are -not close to the main line, such as @ruby_raise()@ and error handling were cut +Let's examine the handler of `NODE_MODULE` of `rb_eval()`. The parts that are +not close to the main line, such as `ruby_raise()` and error handling were cut drastically. So far, there have been a lot of cutting works for 200 pages, it has already became unnecessary to show the original code. -

▼ @rb_eval()@ − @NODE_MODULE@ (simplified)

+

▼ `rb_eval()` − `NODE_MODULE` (simplified)

```TODO-lang case NODE_MODULE: @@ -612,14 +615,14 @@ case NODE_MODULE: ``` -First, we'd like to make sure the module is nested and defined above (the module holded by) @ruby_class@. -We can understand it from the fact that it calls @ruby_const_xxxx()@ on @ruby_class@. -Just once @ruby_cbase@ appears, but it is usually identical to @ruby_class@, +First, we'd like to make sure the module is nested and defined above (the module holded by) `ruby_class`. +We can understand it from the fact that it calls `ruby_const_xxxx()` on `ruby_class`. +Just once `ruby_cbase` appears, but it is usually identical to `ruby_class`, so we can ignore it. Even if they are different, it rarely causes a problem. -The first half, it is branching by @if@ because it needs to check if the +The first half, it is branching by `if` because it needs to check if the module has already been defined. This is because, in Ruby, we can do "additional" definitions on the same one module any number of times. @@ -637,35 +640,35 @@ end ``` -In this program, the two methods, @a@ and @b@, will be defined on the module @M@. +In this program, the two methods, `a` and `b`, will be defined on the module `M`. -In this case, on the second definition of @M@ the module @M@ was already set to +In this case, on the second definition of `M` the module `M` was already set to the constant, just obtaining and using it would be sufficient. If the constant -@M@ does not exist yet, it means the first definition and the module is created -(by @rb_define_module_id()@ ) +`M` does not exist yet, it means the first definition and the module is created +(by `rb_define_module_id()` ) -Lastly, @module_setup()@ is the function executing the body of a module +Lastly, `module_setup()` is the function executing the body of a module statement. Not only the module statements but the class statements and the -singleton class statements are executed by @module_setup()@. +singleton class statements are executed by `module_setup()`. This is the reason why I said "all of these three type of statements are similar things". -For now, I'd like you to note that @node->nd_body@ ( @NODE_SCOPE@ ) is passed as +For now, I'd like you to note that `node->nd_body` (`NODE_SCOPE`) is passed as an argument. -### @module_setup@ +### `module_setup` -For the module and class and singleton class statements, @module_setup()@ +For the module and class and singleton class statements, `module_setup()` executes their bodies. Finally, the Ruby stack manipulations will appear in large amounts. -

▼ @module_setup()@

+

▼ `module_setup()`

```TODO-lang 3424 static VALUE @@ -736,7 +739,7 @@ This is too big to read all in one gulp. Let's cut the parts that seems unnecessary. -First, the parts around @trace_func@ can be deleted unconditionally. +First, the parts around `trace_func` can be deleted unconditionally. We can see the idioms related to tags. Let's simplify them by expressing with @@ -744,26 +747,26 @@ the Ruby's ensure. Immediately after the start of the function, the argument n is purposefully -assigned to the local variable @node@, but @volatile@ is attached to @node@ and +assigned to the local variable `node`, but `volatile` is attached to `node` and it would never be assigned after that, thus this is to prevent from being -garbage collected. If we assume that the argument was @node@ from the beginning, +garbage collected. If we assume that the argument was `node` from the beginning, it would not change the meaning. -In the first half of the function, there's the part manipulating @ruby_frame@ -complicatedly. It is obviously paired up with the part @ruby_frame = frame.tmp@ +In the first half of the function, there's the part manipulating `ruby_frame` +complicatedly. It is obviously paired up with the part `ruby_frame = frame.tmp` in the last half. We'll focus on this part later, but for the time being this -can be considered as @push pop@ of @ruby_frame@. +can be considered as `push pop` of `ruby_frame`. Plus, it seems that the code (A) can be, as commented, summarized as the -initialization of @ruby_scope->local_vars@. This will be discussed later. +initialization of `ruby_scope->local_vars`. This will be discussed later. Consequently, it could be summarized as follows: -

▼ @module_setup@ (simplified)

+

▼ `module_setup` (simplified)

```TODO-lang static VALUE @@ -797,16 +800,16 @@ module_setup(module, node) ``` -It does @rb_eval()@ with @node->nd_next@, +It does `rb_eval()` with `node->nd_next`, so it's certain that this is the code of the module body. The problems are about the others. There are 5 points to see. -* Things occur on @PUSH_SCOPE() PUSH_VARS()@ +* Things occur on `PUSH_SCOPE() PUSH_VARS()` * How the local variable space is allocated -* The effect of @PUSH_CLASS@ -* The relationship between @ruby_cref@ and @ruby_frame->cbase@ -* What is done by manipulating @ruby_frame@ +* The effect of `PUSH_CLASS` +* The relationship between `ruby_cref` and `ruby_frame->cbase` +* What is done by manipulating `ruby_frame` Let's investigate them in order. @@ -817,12 +820,12 @@ Let's investigate them in order. ### Creating a local variable scope -@PUSH_SCOPE@ pushes a local variable space and @PUSH_VARS()@ pushes a block +`PUSH_SCOPE` pushes a local variable space and `PUSH_VARS()` pushes a block local variable space, thus a new local variable scope is created by these two. Let's examine the contents of these macros and what is done. -

▼ @PUSH_SCOPE() POP_SCOPE()@

+

▼ `PUSH_SCOPE() POP_SCOPE()`

```TODO-lang 852 #define PUSH_SCOPE() do { \ @@ -858,7 +861,7 @@ Let's examine the contents of these macros and what is done. ``` -As the same as tags, @SCOPE@ s also create a stack by being synchronized with the +As the same as tags, `SCOPE` s also create a stack by being synchronized with the machine stack. What differentiate slightly is that the spaces of the stack frames are allocated in the heap, the machine stack is used in order to create the stack structure (Fig.5.). @@ -870,7 +873,7 @@ Fig.5. The machine stack and the SCOPE Stack

-Additionally, the flags like @SCOPE_@ something repeatedly appearing in the +Additionally, the flags like `SCOPE_` something repeatedly appearing in the macros are not able to be explained until I finish to talk all about in what form each stack frame is remembered and about blocks. Thus, these will be discussed in Chapter 16: Blocks all at once. @@ -881,11 +884,11 @@ Thus, these will be discussed in Chapter 16: Blocks all at once. ### Allocating the local variable space -As I mentioned many times, the local variable scope is represented by @struct SCOPE@. -But @struct SCOPE@ is literally a "scope" and it does not have the real body +As I mentioned many times, the local variable scope is represented by `struct SCOPE`. +But `struct SCOPE` is literally a "scope" and it does not have the real body to store local variables. To put it more precisely, it has the pointer to a space but there's still no array at the place where the one points to. The -following part of @module_setup@ prepares the array. +following part of `module_setup` prepares the array.

▼The preparation of the local variable slots

@@ -907,33 +910,33 @@ following part of @module_setup@ prepares the array. ``` -The @TMP_ALLOC()@ at the beginning will be described in the next section. If I -put it shortly, it is "@alloca@ that is assured to allocate on the stack +The `TMP_ALLOC()` at the beginning will be described in the next section. If I +put it shortly, it is "`alloca` that is assured to allocate on the stack (therefore, we do not need to worry about GC)". -@node->nd_tbl@ holds in fact the local variable name table that has appeared -in Chapter 12: Syntax tree construction. It means that @nd_tbl[0]@ contains the table size and the rest is -an array of @ID@. This table is directly preserved to @local_tbl@ of @SCOPE@ -and @local_vars@ is allocated to store the local variable values. +`node->nd_tbl` holds in fact the local variable name table that has appeared +in Chapter 12: Syntax tree construction. It means that `nd_tbl[0]` contains the table size and the rest is +an array of `ID`. This table is directly preserved to `local_tbl` of `SCOPE` +and `local_vars` is allocated to store the local variable values. Because they are confusing, it's a good thing writing some comments such as "This is the variable name", "this is the value". -The one with @tbl@ is for the names. +The one with `tbl` is for the names.

(localvars)
-Fig.6. @ruby_scope->local_vars@ +Fig.6. `ruby_scope->local_vars`

-Where is this @node@ used? -I examined the all @local_vars@ members but could not find the access to index -@-1@ in @eval.c@. Expanding the range of files to investigate, I found the -access in @gc.c@. +Where is this `node` used? +I examined the all `local_vars` members but could not find the access to index +`-1` in `eval.c`. Expanding the range of files to investigate, I found the +access in `gc.c`. -

▼ @rb_gc_mark_children()@ — @T_SCOPE@

+

▼ `rb_gc_mark_children()` — `T_SCOPE`

```TODO-lang 815 case T_SCOPE: @@ -953,10 +956,10 @@ access in @gc.c@. ``` -Apparently, this is a mechanism to protect @node@ from GC. +Apparently, this is a mechanism to protect `node` from GC. But why is it necessary to to mark it here? -@node@ is purposefully store into the @volatile@ local variable, so -it would not be garbage-collected during the execution of @module_setup()@. +`node` is purposefully store into the `volatile` local variable, so +it would not be garbage-collected during the execution of `module_setup()`. Honestly speaking, I was thinking it might merely be a mistake for a while but @@ -964,7 +967,7 @@ it turned out it's actually very important. The issue is this at the next line of the next line: -

▼ @ruby_scope->local_tbl@

+

▼ `ruby_scope->local_tbl`

```TODO-lang 3449 ruby_scope->local_tbl = node->nd_tbl; @@ -974,44 +977,44 @@ line of the next line: The local variable name table prepared by the parser is directly used. When is -this table freed? It's the time when the @node@ become not to be referred from -anywhere. Then, when should @node@ be freed? It's the time after the @SCOPE@ +this table freed? It's the time when the `node` become not to be referred from +anywhere. Then, when should `node` be freed? It's the time after the `SCOPE` assigned on this line will disappear completely. Then, when is that? -@SCOPE@ sometimes persists longer than the statement that causes the creation +`SCOPE` sometimes persists longer than the statement that causes the creation of it. As it will be discussed at Chapter 16: Blocks, -if a @Proc@ object is created, it refers @SCOPE@. -Thus, If @module_setup()@ has finished, the @SCOPE@ created there is not +if a `Proc` object is created, it refers `SCOPE`. +Thus, If `module_setup()` has finished, the `SCOPE` created there is not necessarily be what is no longer used. That's why it's not sufficient that -@node@ is only referred from (the stack frame of) @module_setup()@. -It must be referred "directly" from @SCOPE@. +`node` is only referred from (the stack frame of) `module_setup()`. +It must be referred "directly" from `SCOPE`. -On the other hand, the @volatile node@ of the local variable cannot be removed. -Without it, @node@ is floating on air until it will be assigned to @local_vars@. +On the other hand, the `volatile node` of the local variable cannot be removed. +Without it, `node` is floating on air until it will be assigned to `local_vars`. -However then, @local_vars@ of @SCOPE@ is not safe, isn't it? -@TMP_ALLOC()@ is, as I mentioned, the allocation on the stack, it becomes -invalid at the time @module_setup()@ ends. This is in fact, at the moment when -@Proc@ is created, the allocation method is abruptly switched to @malloc()@. +However then, `local_vars` of `SCOPE` is not safe, isn't it? +`TMP_ALLOC()` is, as I mentioned, the allocation on the stack, it becomes +invalid at the time `module_setup()` ends. This is in fact, at the moment when +`Proc` is created, the allocation method is abruptly switched to `malloc()`. Details will be described in Chapter 16: Blocks. -Lastly, @rb_mem_clear()@ seems zero-filling but actually it is @Qnil@ -filling to -an array of @VALUE@ ( @array.c@ ). By this, all defined local variables are -initialized as @nil@. +Lastly, `rb_mem_clear()` seems zero-filling but actually it is `Qnil` -filling to +an array of `VALUE` (`array.c`). By this, all defined local variables are +initialized as `nil`. -### @TMP_ALLOC@ +### `TMP_ALLOC` -Next, let's read @TMP_ALLOC@ that allocates the local variable space. -This macro is actually paired with @TMP_PROTECT@ existing silently at the -beginning of @module_setup()@. Its typical usage is this: +Next, let's read `TMP_ALLOC` that allocates the local variable space. +This macro is actually paired with `TMP_PROTECT` existing silently at the +beginning of `module_setup()`. Its typical usage is this: @@ -1023,11 +1026,11 @@ ptr = TMP_ALLOC(size); ``` -The reason why @TMP_PROTECT@ is in the place for the local variable definitions +The reason why `TMP_PROTECT` is in the place for the local variable definitions is that ... Let's see its definition. -

▼ @TMP_ALLOC()@

+

▼ `TMP_ALLOC()`

```TODO-lang 1769 #ifdef C_ALLOCA @@ -1048,28 +1051,28 @@ is that ... Let's see its definition. ... it is because it defines a local variable. -As described in Chapter 5: Garbage collection, in the environment of @#ifdef C_ALLOCA@ (that is, -the native @alloca()@ does not exist) @malloca()@ is used to emulate @alloca()@. -However, the arguments of a method are obviously @VALUE@ s and -the GC could not find a @VALUE@ if it is stored in the heap. -Therefore, it is enforced that GC can find it through @NODE@. +As described in Chapter 5: Garbage collection, in the environment of `#ifdef C_ALLOCA` (that is, +the native `alloca()` does not exist) `malloca()` is used to emulate `alloca()`. +However, the arguments of a method are obviously `VALUE` s and +the GC could not find a `VALUE` if it is stored in the heap. +Therefore, it is enforced that GC can find it through `NODE`.

(tmpprotecttmp)
-Fig.7. anchor the space to the stack through @NODE@ +Fig.7. anchor the space to the stack through `NODE`

-On the contrary, in the environment with the true @alloca()@, we can naturally -use @alloca()@ and there's no need to use @TMP_PROTECT@. Thus, a harmless +On the contrary, in the environment with the true `alloca()`, we can naturally +use `alloca()` and there's no need to use `TMP_PROTECT`. Thus, a harmless statement is arbitrarily written. -By the way, why do they want to use @alloca()@ very much by all means. -It's merely because " @alloca()@ is faster than @malloc()@", they said. +By the way, why do they want to use `alloca()` very much by all means. +It's merely because " `alloca()` is faster than `malloc()`", they said. One can think that it's not so worth to care about such tiny difference, -but because the core of the evaluator is the biggest bottleneck of @ruby@, +but because the core of the evaluator is the biggest bottleneck of `ruby`, ... the same as above. @@ -1078,10 +1081,10 @@ but because the core of the evaluator is the biggest bottleneck of @ruby@, ### Changing the place to define methods on. -The value of the stack @ruby_class@ is the place to define a method on at the -time. Conversely, if one push a value to @ruby_class@, it changes the class to +The value of the stack `ruby_class` is the place to define a method on at the +time. Conversely, if one push a value to `ruby_class`, it changes the class to define a method on. This is exactly what is necessary for a class statement. -Therefore, It's also necessary to do @PUSH_CLASS()@ in @module_setup()@. +Therefore, It's also necessary to do `PUSH_CLASS()` in `module_setup()`. Here is the code for it: @@ -1095,11 +1098,11 @@ POP_CLASS(); ``` -Why is there the assignment to @ruby_class@ after doing @PUSH_CLASS()@. +Why is there the assignment to `ruby_class` after doing `PUSH_CLASS()`. We can understand it unexpectedly easily by looking at the definition. -

▼ @PUSH_CLASS() POP_CLASS()@

+

▼ `PUSH_CLASS() POP_CLASS()`

```TODO-lang 841 #define PUSH_CLASS() do { \ @@ -1112,13 +1115,13 @@ We can understand it unexpectedly easily by looking at the definition. ``` -Because @ruby_class@ is not modified even though @PUSH_CLASS@ is done, +Because `ruby_class` is not modified even though `PUSH_CLASS` is done, it is not actually pushed until setting by hand. Thus, these two are closer to "save and restore" rather than "push and pop". You might think that it can be a cleaner macro if passing a class as the -argument of @PUSH_CLASS()@ ... It's absolutely true, but because there are some +argument of `PUSH_CLASS()` ... It's absolutely true, but because there are some places we cannot obtain the class before pushing, it is in this way. @@ -1126,10 +1129,10 @@ places we cannot obtain the class before pushing, it is in this way. ### Nesting Classes -@ruby_cref@ represents the class nesting information at runtime. Therefore, it's -naturally predicted that @ruby_cref@ will be pushed on the module statements or +`ruby_cref` represents the class nesting information at runtime. Therefore, it's +naturally predicted that `ruby_cref` will be pushed on the module statements or on the class statements. -In @module_setup()@, it is pushed as follows: +In `module_setup()`, it is pushed as follows: @@ -1142,11 +1145,11 @@ POP_CREF(); ``` -Here, @module@ is the module being defined. -Let's also see the definitions of @PUSH_CREF()@ and @POP_CREF()@. +Here, `module` is the module being defined. +Let's also see the definitions of `PUSH_CREF()` and `POP_CREF()`. -

▼ @PUSH_CREF() POP_CREF()@

+

▼ `PUSH_CREF() POP_CREF()`

```TODO-lang 849 #define PUSH_CREF(c) \ @@ -1157,13 +1160,13 @@ Let's also see the definitions of @PUSH_CREF()@ and @POP_CREF()@. ``` -Unlike @PUSH_SCOPE@ or something, there are not any complicated techniques and +Unlike `PUSH_SCOPE` or something, there are not any complicated techniques and it's very easy to deal with. It's also not good if there's completely not any such thing. -The problem remains unsolved is what is the meaning of @ruby_frame->cbase@. -It is the information to refer a class variable or a constant from the current @FRAME@. +The problem remains unsolved is what is the meaning of `ruby_frame->cbase`. +It is the information to refer a class variable or a constant from the current `FRAME`. Details will be discussed in the last section of this chapter. @@ -1172,28 +1175,28 @@ Details will be discussed in the last section of this chapter. ### Replacing frames -Lastly, let's focus on the manipulation of @ruby_frame@. The first thing is its +Lastly, let's focus on the manipulation of `ruby_frame`. The first thing is its definition: -```TODO-lang +```c struct FRAME frame; ``` -It is not a pointer. This means that the entire @FRAME@ is allocated on the stack. +It is not a pointer. This means that the entire `FRAME` is allocated on the stack. Both the management structure of the Ruby stack and the local variable -space are on the stack, but in the case of @FRAME@ the entire struct is stored -on the stack. The extreme consumption of the machine stack by @ruby@ is the +space are on the stack, but in the case of `FRAME` the entire struct is stored +on the stack. The extreme consumption of the machine stack by `ruby` is the fruit of these "small techniques" piling up. -Then next, let's look at where doing several things with @frame@. +Then next, let's look at where doing several things with `frame`. -```TODO-lang +```c frame = *ruby_frame; /* copy the entire struct */ frame.tmp = ruby_frame; /* protect the original FRAME from GC */ ruby_frame = &frame; /* replace ruby_frame */ @@ -1203,25 +1206,25 @@ ruby_frame = frame.tmp; /* restore */ ``` -That is, @ruby_frame@ seems temporarily replaced (not pushing). +That is, `ruby_frame` seems temporarily replaced (not pushing). Why is it doing such thing? -I described that @FRAME@ is "pushed on method calls", but to be more precise, +I described that `FRAME` is "pushed on method calls", but to be more precise, it is the stack frame to represent "the main environment to execute a Ruby program". -You can infer it from, for instance, @ruby_frame->cbase@ which appeared previously. -@last_func@ which is "the last called method name" also suggests it. +You can infer it from, for instance, `ruby_frame->cbase` which appeared previously. +`last_func` which is "the last called method name" also suggests it. -Then, why is @FRAME@ not straightforwardly pushed? -It is because this is the place where it is not allowed to push @FRAME@. -@FRAME@ is wanted to be pushed, but if @FRAME@ is pushed, +Then, why is `FRAME` not straightforwardly pushed? +It is because this is the place where it is not allowed to push `FRAME`. +`FRAME` is wanted to be pushed, but if `FRAME` is pushed, it will appear in the backtraces of the program when an exception occurs. The backtraces are things displayed like followings: -```TODO-lang +``` % ruby t.rb t.rb:11:in `c': some error occured (ArgumentError) from t.rb:7:in `b' @@ -1277,23 +1280,23 @@ nd_defn: ``` -I dumped several things and found that there's always @NODE_SCOPE@ in @nd_defn@. -@NODE_SCOPE@ is, as we've seen at the module statements, +I dumped several things and found that there's always `NODE_SCOPE` in `nd_defn`. +`NODE_SCOPE` is, as we've seen at the module statements, the node to store the information to push a local variable scope. -### @NODE_DEFN@ +### `NODE_DEFN` -Subsequently, we will examine the corresponding code of @rb_eval()@. This part +Subsequently, we will examine the corresponding code of `rb_eval()`. This part contains a lot of error handlings and tedious, they are all omitted again. The way of omitting is as usual, deleting the every parts to directly or -indirectly call @rb_raise() rb_warn() rb_warning()@. +indirectly call `rb_raise() rb_warn() rb_warning()`. -

▼ @rb_eval()@ − @NODE_DEFN@ (simplified)

+

▼ `rb_eval()` − `NODE_DEFN` (simplified)

```TODO-lang NODE *defn; @@ -1318,29 +1321,29 @@ result = Qnil; ``` -In the first half, there are the words like @private@ or @protected@, so it is -probably related to visibility. @noex@, which is used as the names of flags, -seems @NOde EXposure@. Let's examine the @if@ statements in order. +In the first half, there are the words like `private` or `protected`, so it is +probably related to visibility. `noex`, which is used as the names of flags, +seems `NOde EXposure`. Let's examine the `if` statements in order. -(A) @SCOPE_TEST()@ is a macro to check if there's an argument flag in -@scope_vmode@. Therefore, the first half of this conditional statement means -"is it a @private@ scope?". -The last half means "it's @private@ if this is defining @initialize@". -The method @initialize@ to initialize an object will unquestionably become @private@. +(A) `SCOPE_TEST()` is a macro to check if there's an argument flag in +`scope_vmode`. Therefore, the first half of this conditional statement means +"is it a `private` scope?". +The last half means "it's `private` if this is defining `initialize`". +The method `initialize` to initialize an object will unquestionably become `private`. -(B) It is @protected@ if the scope is @protected@ (not surprisingly). -My feeling is that there're few cases @protected@ is required in Ruby. +(B) It is `protected` if the scope is `protected` (not surprisingly). +My feeling is that there're few cases `protected` is required in Ruby. ==(C)== This is a bug. I found this just before the submission of this book, so I couldn't fix this beforehand. In the latest code this part is probably already removed. -The original intention is to enforce the methods defined at top level to be @private@. +The original intention is to enforce the methods defined at top level to be `private`. -(D) If it is not any of the above conditions, it is @public@. +(D) If it is not any of the above conditions, it is `public`. Actually, there's not a thing to worth to care about until here. The important @@ -1354,27 +1357,27 @@ rb_add_method(ruby_class, node->nd_mid, defn, noex); ``` -@copy_node_scope()@ is a function to copy (only) @NODE_SCOPE@ attached to the -top of the method body. It is important that @ruby_cref@ is passed ... +`copy_node_scope()` is a function to copy (only) `NODE_SCOPE` attached to the +top of the method body. It is important that `ruby_cref` is passed ... but details will be described soon. -After copying, the definition is finished by adding it by @rb_add_method()@. -The place to define on is of course @ruby_class@. +After copying, the definition is finished by adding it by `rb_add_method()`. +The place to define on is of course `ruby_class`. -### @copy_node_scope()@ +### `copy_node_scope()` -@copy_node_scope()@ is called only from the two places: the method definition -( @NODE_DEFN@ ) and the singleton method definition ( @NODE_DEFS@ ) in @rb_eval()@. +`copy_node_scope()` is called only from the two places: the method definition +(`NODE_DEFN`) and the singleton method definition (`NODE_DEFS`) in `rb_eval()`. Therefore, looking at these two is sufficient to detect how it is used. Plus, the usages at these two places are almost the same. -

▼ @copy_node_scope()@

+

▼ `copy_node_scope()`

```TODO-lang 1752 static NODE* @@ -1398,33 +1401,33 @@ the usages at these two places are almost the same. ``` -I mentioned that the argument @rval@ is the information of the class nesting -( @ruby_cref@ ) of when the method is defined. Apparently, it is @rval@ because it -will be set to @nd_rval@. +I mentioned that the argument `rval` is the information of the class nesting +(`ruby_cref`) of when the method is defined. Apparently, it is `rval` because it +will be set to `nd_rval`. -In the main @if@ statement copies @nd_tbl@ of @NODE_SCOPE@. -It is a local variable name table in other words. The +1 at @ALLOC_N@ is to -additionally allocate the space for @nd_tbl[0]@. As we've seen in Part 2, -@nd_tbl[0]@ holds the local variables count, that was "the actual length of -@nd_tbl@ - 1". +In the main `if` statement copies `nd_tbl` of `NODE_SCOPE`. +It is a local variable name table in other words. The +1 at `ALLOC_N` is to +additionally allocate the space for `nd_tbl[0]`. As we've seen in Part 2, +`nd_tbl[0]` holds the local variables count, that was "the actual length of +`nd_tbl` - 1". -To summarize, @copy_node_scope()@ makes a copy of the @NODE_SCOPE@ which is the -header of the method body. However, @nd_rval@ is additionally set and it is the -@ruby_cref@ (the class nesting information) of when the class is defined. This +To summarize, `copy_node_scope()` makes a copy of the `NODE_SCOPE` which is the +header of the method body. However, `nd_rval` is additionally set and it is the +`ruby_cref` (the class nesting information) of when the class is defined. This information will be used later when referring constants or class variables. -### @rb_add_method()@ +### `rb_add_method()` -The next thing is @rb_add_method()@ that is the function to register a method entry. +The next thing is `rb_add_method()` that is the function to register a method entry. -

▼ @rb_add_method()@

+

▼ `rb_add_method()`

```TODO-lang 237 void @@ -1451,14 +1454,14 @@ The next thing is @rb_add_method()@ that is the function to register a method en ``` -@NEW_METHOD()@ is a macro to create @NODE@. -@rb_clear_cache_by_id()@ is a function to manipulate the method cache. +`NEW_METHOD()` is a macro to create `NODE`. +`rb_clear_cache_by_id()` is a function to manipulate the method cache. This will be explained in the next chapter "Method". -Let's look at the syntax tree which is eventually stored in @m_tbl@ of a class. -I prepared @nodedump-method@ for this kind of purposes. -(@nodedump-method@ : comes with @nodedump@. @nodedump@ is @tools/nodedump.tar.gz@ of the attached CD-ROM) +Let's look at the syntax tree which is eventually stored in `m_tbl` of a class. +I prepared `nodedump-method` for this kind of purposes. +(`nodedump-method` : comes with `nodedump`. `nodedump` is `tools/nodedump.tar.gz` of the attached CD-ROM) @@ -1493,24 +1496,24 @@ nd_body: ``` -There are @NODE_METHOD@ at the top -and @NODE_SCOPE@ previously copied by @copy_node_scope()@ at the next. +There are `NODE_METHOD` at the top +and `NODE_SCOPE` previously copied by `copy_node_scope()` at the next. These probably represent the header of a method. -I dumped several things and there's not any @NODE_SCOPE@ with the methods defined in C, +I dumped several things and there's not any `NODE_SCOPE` with the methods defined in C, thus it seems to indicate that the method is defined at Ruby level. -Additionally, at @nd_tbl@ of @NODE_SCOPE@ the parameter variable name ( @a@ ) appears. +Additionally, at `nd_tbl` of `NODE_SCOPE` the parameter variable name ( `a` ) appears. I mentioned that the parameter variables are equivalent to the local variables, and this briefly implies it. -I'll omit the explanation about @NODE_ARGS@ here +I'll omit the explanation about `NODE_ARGS` here because it will be described at the next chapter "Method". -Lastly, the @nd_cnt@ of the @NODE_METHOD@, it's not so necessary to care about -this time. It is used when having to do with @alias@. +Lastly, the `nd_cnt` of the `NODE_METHOD`, it's not so necessary to care about +this time. It is used when having to do with `alias`. @@ -1536,16 +1539,16 @@ so you are probably able to predict. There are the following two points: -* local variable scope is an array which is pointed by @ruby_scope->local_vars@ +* local variable scope is an array which is pointed by `ruby_scope->local_vars` * the correspondence between each local variable name and each array index has already resolved at the parser level. -Therefore, the code for the local variable reference node @NODE_LVAR@ is as +Therefore, the code for the local variable reference node `NODE_LVAR` is as follows: -

▼ @rb_eval()@ − @NODE_LVAR@

+

▼ `rb_eval()` − `NODE_LVAR`

```TODO-lang 2975 case NODE_LVAR: @@ -1559,7 +1562,7 @@ follows: ``` -It goes without saying but @node->nd_cnt@ is the value that @local_cnt()@ of the +It goes without saying but `node->nd_cnt` is the value that `local_cnt()` of the parser returns. @@ -1574,12 +1577,12 @@ parser returns. In Chapter 6: Variables and constants, I talked about in what form constants are stored and API. Constants are belong to classes and inherited as the same as methods. -As for their actual appearances, they are registered to @iv_tbl@ of -@struct RClass@ with instance variables and class variables. +As for their actual appearances, they are registered to `iv_tbl` of +`struct RClass` with instance variables and class variables. The searching path of a constant is firstly the outer class, secondly the -superclass, however, @rb_const_get()@ only searches the superclass. Why? +superclass, however, `rb_const_get()` only searches the superclass. Why? To answer this question, I need to reveal the last specification of constants. Take a look at the following code: @@ -1596,12 +1599,12 @@ end ``` -@A.new@ is a singleton method of @A@, so its class is the singleton class @(A)@. +`A.new` is a singleton method of `A`, so its class is the singleton class `(A)`. If it is interpreted by following the rule, -it cannot obtain the constant @C@ which is belongs to @A@. +it cannot obtain the constant `C` which is belongs to `A`. -But because it is written so close, to become to want refer the constant @C@ +But because it is written so close, to become to want refer the constant `C` is human nature. Therefore, such reference is possible in Ruby. It can be said that this specification reflects the characteristic of Ruby "The emphasis is on the appearance of the source code". @@ -1612,21 +1615,21 @@ by setting the place which the method definition is "written" as the start point, it refers the constant of the outer class. And, "the class of where the method is written" depends on its context, thus it could not be handled without the information from both the parser and -the evaluator. This is why @rb_cost_get()@ did not have the searching path of +the evaluator. This is why `rb_cost_get()` did not have the searching path of the outer class. -#### @cbase@ +#### `cbase` Then, let's look at the code to refer constants including the outer class. -The ordinary constant references to which @::@ is not attached, become -@NODE_CONST@ in the syntax tree. The corresponding code in @rb_eval()@ is ... +The ordinary constant references to which `::` is not attached, become +`NODE_CONST` in the syntax tree. The corresponding code in `rb_eval()` is ... -

▼ @rb_eval()@ − @NODE_CONST@

+

▼ `rb_eval()` − `NODE_CONST`

```TODO-lang 2994 case NODE_CONST: @@ -1637,29 +1640,29 @@ The ordinary constant references to which @::@ is not attached, become ``` -First, @nd_vid@ appears to be @Variable ID@ and it probably means a constant name. -And, @ruby_frame->cbase@ is "the class where the method definition is written". +First, `nd_vid` appears to be `Variable ID` and it probably means a constant name. +And, `ruby_frame->cbase` is "the class where the method definition is written". The value will be set when invoking the method, thus the code to set has not appeared yet. -And the place where the value to be set comes from is the @nd_rval@ -that has appeared in @copy_node_scope()@ of the method definition. +And the place where the value to be set comes from is the `nd_rval` +that has appeared in `copy_node_scope()` of the method definition. I'd like you to go back a little and check that the member holds the -@ruby_cref@ of when the method is defined. +`ruby_cref` of when the method is defined. -This means, first, the @ruby_cref@ link is built when defining a class or a module. -Assume that the just defined class is @C@ (Fig.81), +This means, first, the `ruby_cref` link is built when defining a class or a module. +Assume that the just defined class is `C` (Fig.81), -Defining the method @m@ (this is probably @C#m@ ) here, -then the current @ruby_cref@ is memorized by the method entry (Fig.82). +Defining the method `m` (this is probably `C#m` ) here, +then the current `ruby_cref` is memorized by the method entry (Fig.82). -After that, when the class statement finished the @ruby_cref@ would start to -point another node, but @node->nd_rval@ naturally continues to point to the +After that, when the class statement finished the `ruby_cref` would start to +point another node, but `node->nd_rval` naturally continues to point to the same thing. (Fig.83) -Then, when invoking the method @C#m@, get @node->nd_rval@ -and insert into the just pushed @ruby_frame->cbase@ (Fig.84) +Then, when invoking the method `C#m`, get `node->nd_rval` +and insert into the just pushed `ruby_frame->cbase` (Fig.84) ... This is the mechanism. Complicated. @@ -1674,13 +1677,13 @@ Fig 8. CREF Trasfer -#### @ev_const_get()@ +#### `ev_const_get()` -Now, let's go back to the code of @NODE_CONST@. -Since only @ev_const_get()@ is left, we'll look at it. +Now, let's go back to the code of `NODE_CONST`. +Since only `ev_const_get()` is left, we'll look at it. -

▼ @ev_const_get()@

+

▼ `ev_const_get()`

```TODO-lang 1550 static VALUE @@ -1709,7 +1712,7 @@ Since only @ev_const_get()@ is left, we'll look at it. ``` -(( According to the errata, the description of @ev_const_get()@ was wrong. +(( According to the errata, the description of `ev_const_get()` was wrong. I omit this part for now. )) @@ -1719,19 +1722,19 @@ Since only @ev_const_get()@ is left, we'll look at it. ### Class variable -What class variables refer to is also @ruby_cref@. Needless to say, +What class variables refer to is also `ruby_cref`. Needless to say, unlike the constants which search over the outer classes one after another, it uses only the first element. -Let's look at the code of @NODE_CVAR@ which is the node to refer to a class +Let's look at the code of `NODE_CVAR` which is the node to refer to a class variable. -What is the @cvar_cbase()@ ? As @cbase@ is attached, -it is probably related to @ruby_frame->cbase@, but how do they differ? +What is the `cvar_cbase()` ? As `cbase` is attached, +it is probably related to `ruby_frame->cbase`, but how do they differ? Let's look at it. -

▼ @cvar_cbase()@

+

▼ `cvar_cbase()`

```TODO-lang 1571 static VALUE @@ -1753,7 +1756,7 @@ Let's look at it. ``` -It traverses @cbase@ up to the class that is not the singleton class, it +It traverses `cbase` up to the class that is not the singleton class, it seems. This feature is added to counter the following kind of code: @@ -1780,9 +1783,9 @@ multiple singleton methods, many people choose to write in the left side way of using the singleton class definition statement to bundle. -However, these two differs in the value of @ruby_cref@. The one using the -singleton class definition is @ruby_cref=(C)@ and the other one defining -singleton methods separately is @ruby_cref=C@. This may cause to differ in the +However, these two differs in the value of `ruby_cref`. The one using the +singleton class definition is `ruby_cref=(C)` and the other one defining +singleton methods separately is `ruby_cref=C`. This may cause to differ in the places where class variables refer to, so this is not convenient. @@ -1793,7 +1796,7 @@ is more on the usability rather than the consistency. And, when the case is a constant reference, since it searches all of the outer classes, -@C@ is included in the search path in either way, so there's no problem. +`C` is included in the search path in either way, so there's no problem. Plus, as for an assignment, since it couldn't be written inside methods in the first place, it is also not related. @@ -1822,7 +1825,7 @@ for that specification, ... understanding the correspondences one by one in such manner is the only way. -But this book is to understand the overall structure of @ruby@ and is not +But this book is to understand the overall structure of `ruby` and is not "Advanced Ruby Programming". Thus, dealing with very tiny things is not fruitful. So here, we only think about the basic structure of multiple assignment and the very simple "multiple-to-multiple" case. @@ -1867,12 +1870,12 @@ nd_value: ``` -Both the left-hand and right-hand sides are the lists of @NODE_ARRAY@, -there's additionally @NODE_REXPAND@ in the right side. @REXPAND@ may be "Right +Both the left-hand and right-hand sides are the lists of `NODE_ARRAY`, +there's additionally `NODE_REXPAND` in the right side. `REXPAND` may be "Right value EXPAND". We are curious about what this node is doing. Let's see. -

▼ @rb_eval()@ − @NODE_REXPAND@

+

▼ `rb_eval()` − `NODE_REXPAND`

```TODO-lang 2575 case NODE_REXPAND: @@ -1883,8 +1886,8 @@ value EXPAND". We are curious about what this node is doing. Let's see. ``` -You can ignore @avalue_to_svalue()@. -@NODE_ARRAY@ is evaluated by @rb_eval()@, (because it is the node of the array +You can ignore `avalue_to_svalue()`. +`NODE_ARRAY` is evaluated by `rb_eval()`, (because it is the node of the array literal), it is turned into a Ruby array and returned back. So, before the left-hand side is handled, all in the right-hand side are evaluated. This enables even the following code: @@ -1896,10 +1899,10 @@ a, b = b, a # swap variables in oneline ``` -Let's look at @NODE_MASGN@ in the left-hand side. +Let's look at `NODE_MASGN` in the left-hand side. -

▼ @rb_eval()@ − @NODE_MASGN@

+

▼ `rb_eval()` − `NODE_MASGN`

```TODO-lang 2923 case NODE_MASGN: @@ -1911,15 +1914,15 @@ Let's look at @NODE_MASGN@ in the left-hand side. Here is only the evaluation of the right-hand side, the rests are delegated to -@massign()@. +`massign()`. -#### @massign()@ +#### `massign()` -

▼ @massi@ ……

+

▼ `massi` ……

```TODO-lang 3917 static VALUE @@ -1935,20 +1938,20 @@ Here is only the evaluation of the right-hand side, the rests are delegated to I'm sorry this is halfway, but I'd like you to stop and pay attention to the -4th argument. @pcall@ is @Proc CALL@, this indicates whether or not the -function is used to call @Proc@ object. Between @Proc@ calls and the others +4th argument. `pcall` is `Proc CALL`, this indicates whether or not the +function is used to call `Proc` object. Between `Proc` calls and the others there's a little difference in the strictness of the check of the multiple assignments, so a flag is received to check. Obviously, the value is decided to be either 0 or 1. -Then, I'd like you to look at the previous code calling @massign()@, it was -@pcall=0@. Therefore, we probably don't mind if assuming it is @pcall=0@ for the +Then, I'd like you to look at the previous code calling `massign()`, it was +`pcall=0`. Therefore, we probably don't mind if assuming it is `pcall=0` for the time being and extracting the variables. That is, when there's an argument like -@pcall@ which is slightly changing the behavior, we always need to consider the +`pcall` which is slightly changing the behavior, we always need to consider the two patterns of scenarios, so it is really cumbersome. If there's only one -actual function @massign()@, to think as if there were two functions, @pcall=0@ -and @pcall=1@, is way simpler to read. +actual function `massign()`, to think as if there were two functions, `pcall=0` +and `pcall=1`, is way simpler to read. When writing a program we must avoid duplications as much as possible, @@ -1959,11 +1962,11 @@ There are wordings "optimize for speed" "optimize for the code size", in this case we'll "optimize for readability". -So, assuming it is @pcall=0@ and cutting the codes as much as possible and the +So, assuming it is `pcall=0` and cutting the codes as much as possible and the final appearance is shown as follows: -

▼ @massign()@ (simplified)

+

▼ `massign()` (simplified)

```TODO-lang static VALUE @@ -2008,25 +2011,25 @@ massign(self, node, val /* , pcall=0 */) ``` -@val@ is the right-hand side value. And there's the suspicious conversion called -@svalue_to_mvalue()@, since @mvalue_to_svalue()@ appeared previously and -@svalue_to_mvalue()@ in this time, so you can infer "it must be getting back". -((errata: it was @avalue_to_svalue()@ in the previous case. +`val` is the right-hand side value. And there's the suspicious conversion called +`svalue_to_mvalue()`, since `mvalue_to_svalue()` appeared previously and +`svalue_to_mvalue()` in this time, so you can infer "it must be getting back". +((errata: it was `avalue_to_svalue()` in the previous case. Therefore, it's hard to infer "getting back", but you can ignore them anyway.)) -Thus, the both are deleted. In the next line, since it uses @RARRAY()@, -you can infer that the right-hand side value is an @Array@ of Ruby. -Meanwhile, the left-hand side is @node->nd_head@, so it is the value assigned to -the local variable @list@. This @list@ is also a node ( @NODE_ARRAY@ ). +Thus, the both are deleted. In the next line, since it uses `RARRAY()`, +you can infer that the right-hand side value is an `Array` of Ruby. +Meanwhile, the left-hand side is `node->nd_head`, so it is the value assigned to +the local variable `list`. This `list` is also a node (`NODE_ARRAY`). We'll look at the code by clause. -(A) @assign@ is, as the name suggests, a function to perform an one-to-one +(A) `assign` is, as the name suggests, a function to perform an one-to-one assignment. Since the left-hand side is expressed by a node, -if it is, for instance, @NODE_IASGN@ (an assignment to an instance variable), -it assigns with @rb_ivar_set()@. -So, what it is doing here is adjusting to either @list@ and @val@ which is +if it is, for instance, `NODE_IASGN` (an assignment to an instance variable), +it assigns with `rb_ivar_set()`. +So, what it is doing here is adjusting to either `list` and `val` which is shorter and doing one-to-one assignments. (Fig.9) @@ -2037,14 +2040,14 @@ Fig.9. assign when corresponded (B) if there are remainders on the right-hand side, turn them into a Ruby -array and assign it into (the left-hand side expressed by) the @node->nd_args@. +array and assign it into (the left-hand side expressed by) the `node->nd_args`. -==(C)== if there are remainders on the left-hand side, assign @nil@ to all of them. +==(C)== if there are remainders on the left-hand side, assign `nil` to all of them. -By the way, the procedure which is assuming @pcall=0@ then cutting out is very +By the way, the procedure which is assuming `pcall=0` then cutting out is very similar to the data flow analytics / constant foldings used on the optimization phase of compilers. Therefore, we can probably automate it to some extent. diff --git a/spec.md b/spec.md index 6a8da40..89fd1c2 100644 --- a/spec.md +++ b/spec.md @@ -645,8 +645,8 @@ m(*[1,2,3]) # We could have written the expanded form in the first place... m(*mcall()) ``` -The @*@ argument can be used together with ordinary arguments, -but the @*@ argument must come last. +The `*` argument can be used together with ordinary arguments, +but the `*` argument must come last. Otherwise, the correspondences to parameter variables cannot be determined in a single way. @@ -1127,7 +1127,7 @@ variables `i` are different. } ``` -Inside the block the @i@ inside overshadows the @i@ outside. +Inside the block the `i` inside overshadows the `i` outside. That's why it's called shadowing. But what happens with block local variables of Ruby where there's no shadowing. @@ -1143,9 +1143,9 @@ end p i # 1 the change is preserved ``` -Even when we assign @i@ inside the block, +Even when we assign `i` inside the block, if there is the same name outside, it would be used. -Therefore when we assign to inside @i@, the value of outside @i@ would be +Therefore when we assign to inside `i`, the value of outside `i` would be changed. On this point there came many complains: "This is error prone. Please do shadowing." Each time there's nearly flaming but till now no conclusion was reached. @@ -1215,7 +1215,7 @@ end end ``` -@yield@ calls the block. At this point control is passed to the block, +`yield` calls the block. At this point control is passed to the block, when the execution of the block finishes it returns back to the same location. Think about it like a characteristic function call. When the present method does not have a block a runtime error will occur. @@ -1238,15 +1238,15 @@ p twice.call(9) # 18 will be printed ``` In short, it is like a function. As might be expected from the fact it is -created with @new@, the return value of @Proc.new@ is an instance -of the @Proc@ class. +created with `new`, the return value of `Proc.new` is an instance +of the `Proc` class. -@Proc.new@ looks surely like an iterator and it is indeed so. -It is an ordinary iterator. There's only some mystic mechanism inside @Proc.new@ +`Proc.new` looks surely like an iterator and it is indeed so. +It is an ordinary iterator. There's only some mystic mechanism inside `Proc.new` which turns an iterator block into an object. -Besides there is a function style method @lambda@ provided which -has the same effect as @Proc.new@. Choose whatever suits you. +Besides there is a function style method `lambda` provided which +has the same effect as `Proc.new`. Choose whatever suits you. ```TODO-lang twice = lambda {|n| n * 2 } @@ -1254,13 +1254,13 @@ twice = lambda {|n| n * 2 } #### Iterators and `Proc` -Why did we start talking all of a sudden about @Proc@? Because there -is a deep relationship between iterators and @Proc@. -In fact, iterator blocks and @Proc@ objects are quite the same thing. +Why did we start talking all of a sudden about `Proc`? Because there +is a deep relationship between iterators and `Proc`. +In fact, iterator blocks and `Proc` objects are quite the same thing. That's why one can be transformed into the other. -First, to turn an iterator block into a @Proc@ object -one has to put an @&@ in front of the parameter name. +First, to turn an iterator block into a `Proc` object +one has to put an `&` in front of the parameter name. ```TODO-lang def print_block( &block ) @@ -1271,12 +1271,12 @@ print_block() do end # Shows something like print_block() # Without a block nil is printed ``` -With an @&@ in front of the argument name, the block is transformed to -a @Proc@ object and assigned to the variable. If the method is not an -iterator (there's no block attached) @nil@ is assigned. +With an `&` in front of the argument name, the block is transformed to +a `Proc` object and assigned to the variable. If the method is not an +iterator (there's no block attached) `nil` is assigned. -And in the other direction, if we want to pass a @Proc@ to an iterator -we also use @&@. +And in the other direction, if we want to pass a `Proc` to an iterator +we also use `&`. ```TODO-lang block = Proc.new {|i| p i } @@ -1315,9 +1315,9 @@ always consists of some elements. ### `if` -We probably do not need to explain the @if@ expression. If the conditional +We probably do not need to explain the `if` expression. If the conditional expression is true, the body is executed. As explained in Part 1, -every object except @nil@ and @false@ is true in Ruby. +every object except `nil` and `false` is true in Ruby. ```TODO-lang if cond0 then @@ -1332,7 +1332,7 @@ end ``` `elsif`/`else`-clauses can be omitted. Each `then` as well. -But there are some finer requirements concerning @then@. +But there are some finer requirements concerning `then`. For this kind of thing, looking at some examples is the best way to understand. Here only thing I'd say is that the below codes are valid. @@ -1360,7 +1360,7 @@ p(if false then 1 elsif true then 2 else 3 end) #=> 2 ``` If there's no match, or the matched clause is empty, -the value would be @nil@. +the value would be `nil`. ```TODO-lang p(if false then 1 end) #=> nil @@ -1369,7 +1369,7 @@ p(if true then end) #=> nil ### `unless` -An @if@ with a negated condition is an @unless@. +An `if` with a negated condition is an `unless`. The following two expressions have the same meaning. ```TODO-lang @@ -1378,19 +1378,19 @@ unless cond then if not (cond) then end end ``` -@unless@ can also have attached @else@ clauses but any @elsif@ cannot be +`unless` can also have attached `else` clauses but any `elsif` cannot be attached. -Needless to say, @then@ can be omitted. +Needless to say, `then` can be omitted. -@unless@ also has a value and its condition to decide is completely the same as +`unless` also has a value and its condition to decide is completely the same as `if`. It means the entire value would be the value of the body of the matched clause. If there's no match or the matched clause is empty, -the value would be @nil@. +the value would be `nil`. ### `and && or ||` -The most likely utilization of the @and@ is probably a boolean operation. -For instance in the conditional expression of an @if@. +The most likely utilization of the `and` is probably a boolean operation. +For instance in the conditional expression of an `if`. ```TODO-lang if cond1 and cond2 @@ -1408,7 +1408,7 @@ invalid?(key) and return nil return nil end ``` -@&&@ and @and@ have the same meaning. Different is the binding order. +`&&` and `and` have the same meaning. Different is the binding order. ```TODO-lang method arg0 && arg1 # method(arg0 && arg1) @@ -1420,17 +1420,17 @@ Basically the symbolic operator creates an expression which can be an argument The alphabetical operator creates an expression which cannot become an argument (`expr`). -As for @and@, if the evaluation of the left hand side is true, +As for `and`, if the evaluation of the left hand side is true, the right hand side will also be evaluated. -On the other hand @or@ is the opposite of @and@. If the evaluation of the left hand +On the other hand `or` is the opposite of `and`. If the evaluation of the left hand side is false, the right hand side will also be evaluated. ```TODO-lang valid?(key) or return nil ``` -@or@ and @||@ have the same relationship as @&&@ and @and@. Only the precedence is +`or` and `||` have the same relationship as `&&` and `and`. Only the precedence is different. ### The Conditional Operator @@ -1461,8 +1461,8 @@ while cond do end ``` -This is the simplest loop syntax. As long as @cond@ is true -the body is executed. The @do@ can be omitted. +This is the simplest loop syntax. As long as `cond` is true +the body is executed. The `do` can be omitted. ```TODO-lang until io_ready?(id) do @@ -1470,14 +1470,14 @@ until io_ready?(id) do end ``` -@until@ creates a loop whose condition definition is opposite. +`until` creates a loop whose condition definition is opposite. As long as the condition is false it is executed. -The @do@ can be omitted. +The `do` can be omitted. Naturally there is also jump syntaxes to exit a loop. -@break@ as in C/C++/Java is also @break@, -but @continue@ is @next@. -Perhaps @next@ has come from Perl. +`break` as in C/C++/Java is also `break`, +but `continue` is `next`. +Perhaps `next` has come from Perl. ```TODO-lang i = 0 @@ -1492,7 +1492,7 @@ while true end ``` -And there is another Perlism: the @redo@. +And there is another Perlism: the `redo`. ```TODO-lang while cond @@ -1504,15 +1504,15 @@ end ``` It will return to (A) and repeat from there. -What differs from @next@ is it does not check the condition. +What differs from `next` is it does not check the condition. I might come into the world top 100, if the amount of Ruby programs -would be counted, but I haven't used @redo@ yet. It does not seem to be +would be counted, but I haven't used `redo` yet. It does not seem to be necessary after all because I've lived happily despite of it. ### `case` -A special form of the @if@ expression. It performs branching on a series of +A special form of the `if` expression. It performs branching on a series of conditions. The following left and right expressions are identical in meaning. ```TODO-lang @@ -1528,12 +1528,12 @@ else else end end ``` -The threefold equals @===@ is, as the same as the @==@, actually a method call. +The threefold equals `===` is, as the same as the `==`, actually a method call. Notice that the receiver is the object on the left hand side. Concretely, if it is the `===` of an `Array`, it would check if it contains the `value` as its element. If it is a `Hash`, it tests whether it has the `value` as its key. -If its is an regular expression, it tests if the @value@ matches. +If its is an regular expression, it tests if the `value` matches. And so on. Since `case` has many grammatical elements, to list them all would be tedious, thus we will not cover them in this book. @@ -1553,11 +1553,11 @@ In Ruby exceptions come in the form of the function style method `raise`. raise ArgumentError, "wrong number of argument" ``` -In Ruby exception are instances of the @Exception@ class and it's +In Ruby exception are instances of the `Exception` class and it's subclasses. This form takes an exception class as its first argument and an error message as its second argument. In the above case -an instance of @ArgumentError@ is created and "thrown". Exception -object would ditch the part after the @raise@ and start to return upwards the +an instance of `ArgumentError` is created and "thrown". Exception +object would ditch the part after the `raise` and start to return upwards the method call stack. ```TODO-lang @@ -1571,7 +1571,7 @@ raise_exception() If nothing blocks the exception it will move on and on and finally it will reach the top level. -When there's no place to return any more, @ruby@ gives out a message and ends +When there's no place to return any more, `ruby` gives out a message and ends with a non-zero exit code. ```TODO-lang @@ -1580,10 +1580,10 @@ raise.rb:2:in `raise_exception': wrong number of argument (ArgumentError) from raise.rb:7 ``` -However an @exit@ would be sufficient for this, and for an exception there +However an `exit` would be sufficient for this, and for an exception there should be a way to set handlers. -In Ruby, @begin@~@rescue@~@end@ is used for this. -It resembles the @try@~@catch@ in C++ and Java. +In Ruby, `begin`~`rescue`~`end` is used for this. +It resembles the `try`~`catch` in C++ and Java. ```TODO-lang def raise_exception @@ -1598,12 +1598,12 @@ rescue ArgumentError => err then end ``` -@rescue@ is a control structure which captures exceptions, it catches +`rescue` is a control structure which captures exceptions, it catches exception objects of the specified class and its subclasses. In the -above example, an instance of @ArgumentError@ comes flying into the place -where @ArgumentError@ is targeted, so it matches this @rescue@. -By @=>err@ the exception object will be assigned to the local variable -@err@, after that the @rescue@ part is executed. +above example, an instance of `ArgumentError` comes flying into the place +where `ArgumentError` is targeted, so it matches this `rescue`. +By `=>err` the exception object will be assigned to the local variable +`err`, after that the `rescue` part is executed. ```TODO-lang % ruby rescue.rb @@ -1624,9 +1624,9 @@ rescue ArgumentError => err then end ``` -We can omit the @=>err@ and the @then@ after @rescue@. We can also leave +We can omit the `=>err` and the `then` after `rescue`. We can also leave out the exception class. In this case, it means as the same as when the -@StandardError@ class is specified. +`StandardError` class is specified. If we want to catch more exception classes, we can just write them in line. When we want to handle different errors differently, we can specify several @@ -1643,9 +1643,9 @@ end When written in this way, a `rescue` clause that matches the exception class is searched in order from the top. Only the matched clause will be executed. -For instance, only the clause of @IOError@ will be executed in the above case. +For instance, only the clause of `IOError` will be executed in the above case. -On the other hand, when there is an @else@ clause, it is executed +On the other hand, when there is an `else` clause, it is executed only when there is no exception. ```TODO-lang @@ -1658,7 +1658,7 @@ else end ``` -Moreover an @ensure@ clause will be executed in every case: +Moreover an `ensure` clause will be executed in every case: when there is no exception, when there is an exception, rescued or not. ```TODO-lang @@ -1670,12 +1670,12 @@ ensure # this part will be executed anyway end ``` -By the way, this @begin@ expression also has a value. The value of the -whole @begin@~@end@ expression is the value of the part which was executed -last among @begin@/@rescue@/@else@ clauses. +By the way, this `begin` expression also has a value. The value of the +whole `begin`~`end` expression is the value of the part which was executed +last among `begin`/`rescue`/`else` clauses. It means the last statement of the clauses aside from `ensure`. -The reason why the @ensure@ is not counted is probably because -@ensure@ is usually used for cleanup (thus it is not a main line). +The reason why the `ensure` is not counted is probably because +`ensure` is usually used for cleanup (thus it is not a main line). ### Variables and Constants @@ -1691,20 +1691,20 @@ $gvar ``` I want to add one more thing. -Among the variables starting with @$@, +Among the variables starting with `$`, there are special kinds. They are not necessarily global variables and some have strange names. -First the Perlish variables @$_@ and @$~@. @$_@ saves the return -value of @gets@ and other methods, @$~@ contains the last match +First the Perlish variables `$_` and `$~`. `$_` saves the return +value of `gets` and other methods, `$~` contains the last match of a regular expression. They are incredible variables which are local variables and simultaneously thread local variables. -And the @$!@ to hold the exception object when an error is occured, -the @$?@ to hold the status of a child process, -the @$SAFE@ to represent the security level, +And the `$!` to hold the exception object when an error is occured, +the `$?` to hold the status of a child process, +the `$SAFE` to represent the security level, they are all thread local. ### Assignment @@ -1738,11 +1738,11 @@ var = var + 1 ``` it is a shortcut of this code. -Differing from C, the Ruby @+@ is a method and thus part of the library. -In C, the whole meaning of @+=@ is built in the language processor itself. -And in `C++`, @+=@ and @*=@ can be wholly overwritten, +Differing from C, the Ruby `+` is a method and thus part of the library. +In C, the whole meaning of `+=` is built in the language processor itself. +And in `C++`, `+=` and `*=` can be wholly overwritten, but we cannot do this in Ruby. -In Ruby @+=@ is always defined as an operation of the combination of @+@ and assignment. +In Ruby `+=` is always defined as an operation of the combination of `+` and assignment. We can also combine self assignment and an attribute-access-flavor method. The result more looks like an attribute. @@ -1763,18 +1763,18 @@ If there is `+=` there might also be `++` but this is not the case. Why is that so? In Ruby assignment is dealt with on the language level. But on the other hand methods are in the library. Keeping these two, the world of variables and the world of objects, strictly apart is an -important peculiarity of Ruby. If @++@ were introduced the separation -might easily be broken. That's why there's no @++@ +important peculiarity of Ruby. If `++` were introduced the separation +might easily be broken. That's why there's no `++` -Some people don't want to go without the brevity of @++@. It has been +Some people don't want to go without the brevity of `++`. It has been proposed again and again in the mailing list but was always turned down. -I am also in favor of @++@ but not as much as I can't do without, -and I have not felt so much needs of @++@ in Ruby in the first place, +I am also in favor of `++` but not as much as I can't do without, +and I have not felt so much needs of `++` in Ruby in the first place, so I've kept silent and decided to forget about it. ### `defined?` -@defined?@ is a syntax of a quite different color in Ruby. It tells whether an +`defined?` is a syntax of a quite different color in Ruby. It tells whether an expression value is "defined" or not at runtime. ```TODO-lang @@ -1788,7 +1788,7 @@ evaluated. That said but of course you can't write an expression causing a parse error, and it could not detect if the expression is something containing a method call which raises an error in it. -I would have loved to tell you more about @defined?@ +I would have loved to tell you more about `defined?` but it will not appear again in this book. What a pity. Statements @@ -1882,7 +1882,7 @@ Defines the class `C` which inherits from `SuperClass` We talked quite extensively about classes in Part 1. This statement will be executed, the class to be defined will -become @self@ within the statement, arbitrary expressions can be written within. Class +become `self` within the statement, arbitrary expressions can be written within. Class definitions can be nested. They form the foundation of Ruby execution image. @@ -1974,7 +1974,7 @@ ret1, ret2 = some_method() # some_method might probably return several values ``` Precisely speaking it is as follows. -Here we'll assume @obj@ is (the object of) the value of the left hand side, +Here we'll assume `obj` is (the object of) the value of the left hand side, * `obj` if it is an array * if its `to_ary` method is defined, it is used to convert `obj` to an array. @@ -2008,7 +2008,7 @@ obj.attr0, obj.attr1, obj.attr2 = "a", "b", "c" ``` And like with method parameters, -@*@ can be used to receive in a bundle. +`*` can be used to receive in a bundle. ```TODO-lang first, *rest = 0, 1, 2, 3, 4 @@ -2034,7 +2034,7 @@ Every time when the block is called, the `yield`ed arguments are multi-assigned to `i`. Here there's only one variable on the left hand side, so it does not look like multi assignment. But if there are two or more variables, it would a little more look like it. -For instance, @Hash#each@ is an repeated operation on the pairs of keys and values, +For instance, `Hash#each` is an repeated operation on the pairs of keys and values, so usually we call it like this: ```TODO-lang @@ -2137,11 +2137,11 @@ The program ignores it as a mere comment. ### Multi-byte strings -When the global variable @$KCODE@ is set to either @EUC@, @SJIS@ -or @UTF8@, strings encoded in euc-jp, shift_jis, or utf8 respectively can be +When the global variable `$KCODE` is set to either `EUC`, `SJIS` +or `UTF8`, strings encoded in euc-jp, shift_jis, or utf8 respectively can be used in a string of a data. -And if the option @-Ke@, @-Ks@ or @-Ku@ is given to the @ruby@ +And if the option `-Ke`, `-Ks` or `-Ku` is given to the `ruby` command multibyte strings can be used within the Ruby code. String literals, regular expressions and even operator names can contain multibyte characters. Hence it is possible to do diff --git a/syntree.md b/syntree.md index 8624bf5..27f9ff7 100644 --- a/syntree.md +++ b/syntree.md @@ -10,15 +10,15 @@ Node ---- -### @NODE@ +### `NODE` As I've already described, a Ruby program is first converted to a syntax tree. To be more precise, a syntax tree is a tree structure made of structs called "nodes". -In @ruby@, all nodes are of type @NODE@. +In `ruby`, all nodes are of type `NODE`. -

▼ @NODE@

+

▼ `NODE`

```TODO-lang 128 typedef struct RNode { @@ -51,32 +51,33 @@ In @ruby@, all nodes are of type @NODE@. ``` -Although you might be able to infer from the struct name @RNode@, nodes are Ruby objects. +Although you might be able to infer from the struct name `RNode`, nodes are Ruby objects. This means the creation and release of nodes are taken care of by the `ruby`'s garbage collector. -Therefore, @flags@ naturally has the same role as @basic.flags@ of the object struct. -It means that @T_NODE@ which is the type of a struct and flags such as @FL_FREEZE@ are stored in it. -As for @NODE@, in addition to these, its node type is stored in @flags@. +Therefore, `flags` naturally has the same role as `basic.flags` of the object struct. +It means that `T_NODE` which is the type of a struct and flags such as `FL_FREEZE` are stored in it. +As for `NODE`, in addition to these, its node type is stored in `flags`. What does it mean? Since a program could contain various elements -such as @if@ and @while@ and @def@ and so on, there are also various corresponding node types. +such as `if` and `while` and `def` and so on, there are also various corresponding node types. The three available union are complicated, but how these unions are used is decided to only one specific way for each node. -For example, the below table shows the case when it is @NODE_IF@ that is the node of @if@. +For example, the below table shows the case when it is `NODE_IF` that is the node of `if`. -|_. member |_. union member |_. role | -| @u1@ | @u1.node@ | the condition expression | -| @u2@ | @u2.node@ | the body of true | -| @u3@ | @u3.node@ | the body of false | +| member | union member | role | +| ------ | ------------ | ------------------------ | +| `u1` | `u1.node` | the condition expression | +| `u2` | `u2.node` | the body of true | +| `u3` | `u3.node` | the body of false | -And, in @node.h@, the macros to access each union member are available. +And, in `node.h`, the macros to access each union member are available. -

▼ the macros to access @NODE@

+

▼ the macros to access `NODE`

```TODO-lang 166 #define nd_head u1.node @@ -105,19 +106,19 @@ head->nd_next = tail; /* head->u3.node = tail */ In the source code, it's almost certain that these macros are used. -A very few exceptions are only the two places where creating @NODE@ in @parse.y@ -and where marking @NODE@ in @gc.c@. +A very few exceptions are only the two places where creating `NODE` in `parse.y` +and where marking `NODE` in `gc.c`. By the way, what is the reason why such macros are used? -For one thing, it might be because it's cumbersome to remember numbers like @u1@ +For one thing, it might be because it's cumbersome to remember numbers like `u1` that are not meaningful by just themselves. But what is more important than that is, there should be no problem if the corresponding number is changed and it's possible that it will actually be changed. -For example, since a condition clause of @if@ does not have to be stored in @u1@, -someone might want to change it to @u2@ for some reason. But if @u1@ is directly used, +For example, since a condition clause of `if` does not have to be stored in `u1`, +someone might want to change it to `u2` for some reason. But if `u1` is directly used, he needs to modify a lot of places all over the source codes, it is inconvenient. -Since nodes are all declared as @NODE@, it's hard to find nodes that represent @if@. +Since nodes are all declared as `NODE`, it's hard to find nodes that represent `if`. By preparing the macros to access, this kind of trouble can be avoided and conversely we can determine the node types from the macros. @@ -126,12 +127,12 @@ and conversely we can determine the node types from the macros. ### Node Type -I said that in the @flags@ of a @NODE@ struct its node type is stored. +I said that in the `flags` of a `NODE` struct its node type is stored. We'll look at in what form this information is stored. -A node type can be set by @nd_set_type()@ and obtained by @nd_type()@. +A node type can be set by `nd_set_type()` and obtained by `nd_type()`. -

▼ @nd_type nd_set_type@

+

▼ `nd_type nd_set_type`

```TODO-lang 156 #define nd_type(n) (((RNODE(n))->flags>>FL_USHIFT)&0xff) @@ -143,7 +144,7 @@ A node type can be set by @nd_set_type()@ and obtained by @nd_type()@. ``` -

▼ @FL_USHIFT FL_UMASK@

+

▼ `FL_USHIFT FL_UMASK`

```TODO-lang 418 #define FL_USHIFT 11 @@ -153,7 +154,7 @@ A node type can be set by @nd_set_type()@ and obtained by @nd_type()@. ``` -It won't be so much trouble if we'll keep focus on around @nd_type@. +It won't be so much trouble if we'll keep focus on around `nd_type`. Fig.1 shows how it seems like. @@ -164,10 +165,10 @@ Fig.1: The usage of RNode.flags And, since macros cannot be used from debuggers, -the @nodetype()@ function is also available. +the `nodetype()` function is also available. -

▼ @nodetype@

+

▼ `nodetype`

```TODO-lang 4247 static enum node_type @@ -186,14 +187,14 @@ the @nodetype()@ function is also available. ### File Name and Line Number -The @nd_file@ of a @NODE@ holds (the pointer to) the name of the file where the +The `nd_file` of a `NODE` holds (the pointer to) the name of the file where the text that corresponds to this node exists. Since there's the file name, we naturally expect that there's also the line number, but the corresponding member could not be found around here. Actually, the line number is being embedded to -@flags@ by the following macro: +`flags` by the following macro: -

▼ @nd_line nd_set_line@

+

▼ `nd_line nd_set_line`

```TODO-lang 160 #define NODE_LSHIFT (FL_USHIFT+8) @@ -208,21 +209,21 @@ could not be found around here. Actually, the line number is being embedded to ``` -@nd_set_line()@ is fairly spectacular. -However, as the names suggest, it is certain that @nd_set_line()@ and @nd_line@ -works symmetrically. Thus, if we first examine the simpler @nd_line()@ and grasp -the relationship between the parameters, there's no need to analyze @nd_set_line()@ +`nd_set_line()` is fairly spectacular. +However, as the names suggest, it is certain that `nd_set_line()` and `nd_line` +works symmetrically. Thus, if we first examine the simpler `nd_line()` and grasp +the relationship between the parameters, there's no need to analyze `nd_set_line()` in the first place. -The first thing is @NODE_LSHIFT@, as you can guess from the description of the -node types of the previous section, it is the number of used bits in @flags@. -@FL_USHIFT@ is reserved by system of @ruby@ (11 bits, @ruby.h@), 8 bits are for +The first thing is `NODE_LSHIFT`, as you can guess from the description of the +node types of the previous section, it is the number of used bits in `flags`. +`FL_USHIFT` is reserved by system of `ruby` (11 bits, `ruby.h`), 8 bits are for its node type. -The next thing is @NODE_LMASK@. +The next thing is `NODE_LMASK`. @@ -232,7 +233,7 @@ sizeof(NODE*) * CHAR_BIT - NODE_LSHIFT This is the number of the rest of the bits. -Let's assume it is @restbits@. This makes the code a lot simpler. +Let's assume it is `restbits`. This makes the code a lot simpler. @@ -242,18 +243,18 @@ Let's assume it is @restbits@. This makes the code a lot simpler. Fig.2 shows what the above code seems to be doing. Note that a borrow occurs -when subtracting 1. We can eventually understand that @NODE_LMASK@ is a sequence +when subtracting 1. We can eventually understand that `NODE_LMASK` is a sequence filled with 1 whose size is the number of the bits that are still available.

(lmask)
-Fig.2: @NODE_LMASK@ +Fig.2: `NODE_LMASK`

-Now, let's look at @nd_line()@ again. +Now, let's look at `nd_line()` again. @@ -263,13 +264,13 @@ Now, let's look at @nd_line()@ again. By the right shift, the unused space is shifted to the LSB. The bitwise AND -leaves only the unused space. Fig.3 shows how @flags@ is used. Since @FL_USHIFT@ +leaves only the unused space. Fig.3 shows how `flags` is used. Since `FL_USHIFT` is 11, in 32-bit machine 32-(11+8)=13 bits are available for the line number.

(flags)
-Fig.3: How @flags@ are used at @NODE@ +Fig.3: How `flags` are used at `NODE`

@@ -286,20 +287,20 @@ File.open('overflow.rb', 'w') {|f| ``` -With my 686 machine, @ruby overflow.rb@ properly displayed 1809 as a line number. +With my 686 machine, `ruby overflow.rb` properly displayed 1809 as a line number. I've succeeded. However, if you use 64-bit machine, you need to create a little bigger file in order to successfully fail. -### @rb_node_newnode()@ +### `rb_node_newnode()` -Lastly let's look at the function @rb_node_newnode()@ that creates a node. +Lastly let's look at the function `rb_node_newnode()` that creates a node. -

▼ @rb_node_newnode()@

+

▼ `rb_node_newnode()`

```TODO-lang 4228 NODE* @@ -325,26 +326,26 @@ Lastly let's look at the function @rb_node_newnode()@ that creates a node. ``` -We've seen @rb_newobj()@ in the Chapter 5: Garbage collection. It is the function to get a -vacant @RVALUE@. By attaching the @T_NODE@ struct-type flag to it, -the initialization as a @VALUE@ will complete. -Of course, it's possible that some values that are not of type @NODE*@ are -passed for @u1 u2 u3@, but received as @NODE*@ for the time being. -Since the syntax trees of @ruby@ does not contain @double@ and such, +We've seen `rb_newobj()` in the Chapter 5: Garbage collection. It is the function to get a +vacant `RVALUE`. By attaching the `T_NODE` struct-type flag to it, +the initialization as a `VALUE` will complete. +Of course, it's possible that some values that are not of type `NODE*` are +passed for `u1 u2 u3`, but received as `NODE*` for the time being. +Since the syntax trees of `ruby` does not contain `double` and such, if the values are received as pointers, it will never be too small in size. For the rest part, you can forget about the details you've learned so far, -and assume @NODE@ is +and assume `NODE` is -* @flags@ -* @nodetype@ -* @nd_line@ -* @nd_file@ -* @u1@ -* @u2@ -* @u3@ +* `flags` +* `nodetype` +* `nd_line` +* `nd_file` +* `u1` +* `u2` +* `u3` a struct type that has the above seven members. @@ -365,15 +366,15 @@ In this section, we'll look at the construction process of that syntax tree. -### @YYSTYPE@ +### `YYSTYPE` Essentially this chapter is about actions, -thus @YYSTYPE@ which is the type of @$$@ or @$1@ becomes important. -Let's look at the @%union@ of @ruby@ first. +thus `YYSTYPE` which is the type of `$$` or `$1` becomes important. +Let's look at the `%union` of `ruby` first. -

▼ @%union@ declaration

+

▼ `%union` declaration

```TODO-lang 170 %union { @@ -387,8 +388,8 @@ Let's look at the @%union@ of @ruby@ first. ``` -@struct RVarmap@ is a struct used by the evaluator and holds a block local variable. -You can tell the rest. The most used one is of course @node@. +`struct RVarmap` is a struct used by the evaluator and holds a block local variable. +You can tell the rest. The most used one is of course `node`. @@ -403,7 +404,7 @@ we should start with looking at the answer (the syntax tree). It's also nice using debuggers to observe every time, but you can visualize the syntax tree more handily -by using the tool @nodedump@ contained in the attached CD-ROM, +by using the tool `nodedump` contained in the attached CD-ROM, This tool is originally the NodeDump made by [Pragmatic Programmers](http://www.pragmaticprogrammers.com) and remodeled for this book. The original version shows quite explanatory output, @@ -411,7 +412,7 @@ but this remodeled version deeply and directly displays the appearance of the syntax tree. -For example, in order to dump the simple expression @m(a)@, you can do as follows: +For example, in order to dump the simple expression `m(a)`, you can do as follows: @@ -433,17 +434,17 @@ nd_next: ``` -The @-r@ option is used to specify the library to be load, -and the @-e@ is used to pass a program. +The `-r` option is used to specify the library to be load, +and the `-e` is used to pass a program. Then, the syntax tree expression of the program will be dumped. I'll briefly explain about how to see the content. -@NODE_NEWLINE@ and @NODE_FCALL@ and such are the node types. +`NODE_NEWLINE` and `NODE_FCALL` and such are the node types. What are written at the same indent level of each node are the contents of its node members. -For example, the root is @NODE_NEWLINE@, and it has the three members: -@nd_file nd_nth nd_next@. @nd_file@ points to the @"-e"@ string of C, -and @ng_nth@ points to the 1 integer of C, and @nd_next@ holds the next node @NODE_CALL@. +For example, the root is `NODE_NEWLINE`, and it has the three members: +`nd_file nd_nth nd_next`. `nd_file` points to the `"-e"` string of C, +and `ng_nth` points to the 1 integer of C, and `nd_next` holds the next node `NODE_CALL`. But since these explanation in text are probably not intuitive, I recommend you to also check Fig.4 at the same time. @@ -454,18 +455,18 @@ Fig.4: Syntax Tree

-I'll explain the meaning of each node. @NODE_CALL@ is a Function CALL. -@NODE_ARRAY@ is as its name suggests the node of array, and here it expresses -the list of arguments. @NODE_VCALL@ is a Variable or CALL, a reference to +I'll explain the meaning of each node. `NODE_CALL` is a Function CALL. +`NODE_ARRAY` is as its name suggests the node of array, and here it expresses +the list of arguments. `NODE_VCALL` is a Variable or CALL, a reference to undefined local variable will become this. -Then, what is @NODE_NEWLINE@ ? This is the node to join the name of the currently -executed file and the line number at runtime and is set for each @stmt@. +Then, what is `NODE_NEWLINE` ? This is the node to join the name of the currently +executed file and the line number at runtime and is set for each `stmt`. Therefore, when only thinking about the meaning of the execution, this node can -be ignored. When you @require@ @nodedump-short@ instead of @nodedump@, -distractions like @NODE_NEWLINE@ are left out in the first place. Since it is -easier to see if it is simple, @nodedump-short@ will be used later on except for +be ignored. When you `require` `nodedump-short` instead of `nodedump`, +distractions like `NODE_NEWLINE` are left out in the first place. Since it is +easier to see if it is simple, `nodedump-short` will be used later on except for when particularly written. @@ -485,7 +486,7 @@ tree in other words. First, let's start with the edges that are the leaves of the syntax tree. Literals and variable references and so on, among the rules, they are what -belong to @primary@ and are particularly simple even among the @primary@ rules. +belong to `primary` and are particularly simple even among the `primary` rules. @@ -497,7 +498,7 @@ nd_lit = 1:Fixnum 1 as a numeric value. There's not any twist. However, notice that what is -stored in the node is not 1 of C but 1 of Ruby (1 of @Fixnum@). This is because ... +stored in the node is not 1 of C but 1 of Ruby (1 of `Fixnum`). This is because ... @@ -508,11 +509,11 @@ nd_lit = 9617:Symbol ``` -This way, @Symbol@ is represented by the same @NODE_LIT@ when it becomes a syntax tree. -As the above example, @VALUE@ is always stored in @nd_lit@ so it can be handled -completely in the same way whether it is a @Symbol@ or a @Fixnum@ when executing. +This way, `Symbol` is represented by the same `NODE_LIT` when it becomes a syntax tree. +As the above example, `VALUE` is always stored in `nd_lit` so it can be handled +completely in the same way whether it is a `Symbol` or a `Fixnum` when executing. In this way, all we need to do when dealing with it are retrieving the value -in @nd_lit@ and returning it. Since we create a syntax tree in order to execute it, +in `nd_lit` and returning it. Since we create a syntax tree in order to execute it, designing it so that it becomes convenient when executing is the right thing to do. @@ -553,8 +554,8 @@ nd_next: Array. I can't say this is a leaf, but let's allow this to be here because it's -also a literal. It seems like a list of @NODE_ARRAY@ hung with each element node. -The reason why only in this case I didn't use @nodedump-short@ is ... +also a literal. It seems like a list of `NODE_ARRAY` hung with each element node. +The reason why only in this case I didn't use `nodedump-short` is ... you will understand after finishing to read this section. @@ -564,18 +565,18 @@ you will understand after finishing to read this section. Next, we'll focus on "combinations" that are branches. -@if@ will be taken as an example. +`if` will be taken as an example. -#### @if@ +#### `if` -I feel like @if@ is always used as an example, that's because its structure is -simple and there's not any reader who don't know about @if@, so it is convenient +I feel like `if` is always used as an example, that's because its structure is +simple and there's not any reader who don't know about `if`, so it is convenient for writers. -Anyway, this is an example of @if@. +Anyway, this is an example of `if`. For example, let's convert this code to a syntax tree. @@ -605,16 +606,16 @@ nd_else: ``` -Here, the previously described @nodedump-short@ is used, so @NODE_NEWLINE@ -disappeared. @nd_cond@ is the condition, @nd_body@ is the body of the true case, -@nd_else@ is the body of the false case. +Here, the previously described `nodedump-short` is used, so `NODE_NEWLINE` +disappeared. `nd_cond` is the condition, `nd_body` is the body of the true case, +`nd_else` is the body of the false case. Then, let's look at the code to build this. -

▼ @if@ rule

+

▼ `if` rule

```TODO-lang 1373 | kIF expr_value then @@ -630,9 +631,9 @@ Then, let's look at the code to build this. ``` -It seems that @NEW_IF()@ is the macro to create @NODE_IF@. Among the values of -the symbols, @$2 $4 $5@ are used, thus the correspondences between the symbols -of the rule and @$n@ are: +It seems that `NEW_IF()` is the macro to create `NODE_IF`. Among the values of +the symbols, `$2 $4 $5` are used, thus the correspondences between the symbols +of the rule and `$n` are: @@ -644,16 +645,16 @@ NEW_IF(expr_value, compstmt, if_tail) ``` -this way. In other words, @expr_value@ is the condition expression, @compstmt@ -(@$4@) is the case of true, @if_tail@ is the case of false. +this way. In other words, `expr_value` is the condition expression, `compstmt` +(`$4`) is the case of true, `if_tail` is the case of false. -On the other hand, the macros to create nodes are all named @NEW_xxxx@, and they -are defined @node.h@. Let's look at @NEW_IF()@. +On the other hand, the macros to create nodes are all named `NEW_xxxx`, and they +are defined `node.h`. Let's look at `NEW_IF()`. -

▼ @NEW_IF()@

+

▼ `NEW_IF()`

```TODO-lang 243 #define NEW_IF(c,t,e) rb_node_newnode(NODE_IF,c,t,e) @@ -663,21 +664,21 @@ are defined @node.h@. Let's look at @NEW_IF()@. As for the parameters, -it seems that @c@ represents condition, @t@ represents then, and @e@ represents +it seems that `c` represents condition, `t` represents then, and `e` represents else respectively. As described at the previous section, the order of members of a node is not so meaningful, so you don't need to be careful about parameter names in this kind of place. -And, the @code()@ which processes the node of the condition expression in the +And, the `code()` which processes the node of the condition expression in the action is a semantic analysis function. This will be described later. -Additionally, @fixpos()@ corrects the line number. @NODE@ is initialized with +Additionally, `fixpos()` corrects the line number. `NODE` is initialized with the file name and the line number of the time when it is "created". However, -for instance, the code of @if@ should already be parsed by @end@ by the time -when creating @NODE_IF@. Thus, the line number would go wrong if it remains -untouched. Therefore, it needs to be corrected by @fixpos()@. +for instance, the code of `if` should already be parsed by `end` by the time +when creating `NODE_IF`. Thus, the line number would go wrong if it remains +untouched. Therefore, it needs to be corrected by `fixpos()`. @@ -686,21 +687,21 @@ fixpos(dest, src) ``` -This way, the line number of the node @dest@ is set to the one of the node @src@. -As for @if@, the line number of the condition expression becomes the line number -of the whole @if@ expression. +This way, the line number of the node `dest` is set to the one of the node `src`. +As for `if`, the line number of the condition expression becomes the line number +of the whole `if` expression. -#### @elsif@ +#### `elsif` -Subsequently, let's look at the rule of @if_tail@. +Subsequently, let's look at the rule of `if_tail`. -

▼ @if_tail@

+

▼ `if_tail`

```TODO-lang 1543 if_tail : opt_else @@ -722,9 +723,9 @@ Subsequently, let's look at the rule of @if_tail@. ``` -First, this rule expresses "a list ends with @opt_else@ after zero or more -number of @elsif@ clauses". That's because, @if_tail@ appears again and again -while @elsif@ continues, it disappears when @opt_else@ comes in. We can +First, this rule expresses "a list ends with `opt_else` after zero or more +number of `elsif` clauses". That's because, `if_tail` appears again and again +while `elsif` continues, it disappears when `opt_else` comes in. We can understand this by extracting arbitrary times. @@ -738,7 +739,7 @@ if_tail: kELSIF .... kELSIF .... kELSIF .... kELSE compstmt ``` -Next, let's focus on the actions, surprisingly, @elsif@ uses the same @NEW_IF()@ as @if@. +Next, let's focus on the actions, surprisingly, `elsif` uses the same `NEW_IF()` as `if`. It means, the below two programs will lose the difference after they become syntax trees. @@ -762,21 +763,21 @@ end else Come to think of it, in C language and such, there's no distinction between the two also at the syntax level. Thus this might be a matter of course. -Alternatively, the conditional operator (@a?b:c@) becomes indistinguishable -from @if@ statement after they become syntax trees. +Alternatively, the conditional operator (`a?b:c`) becomes indistinguishable +from `if` statement after they become syntax trees. The precedences was very meaningful when it was in the context of grammar, but they become unnecessary any more because the structure of a syntax tree -contains that information. And, the difference in appearance such as @if@ and +contains that information. And, the difference in appearance such as `if` and the conditional operator become completely meaningless, its meaning (its behavior) only matters. -Therefore, there's perfectly no problem if @if@ and the conditional operator +Therefore, there's perfectly no problem if `if` and the conditional operator are the same in its syntax tree expression. -I'll introduce a few more examples. @add@ and @&&@ become the same. -@or@ and @||@ are also equal to each other. @not@ and @!@, @if@ and modifier @if@, +I'll introduce a few more examples. `add` and `&&` become the same. +`or` and `||` are also equal to each other. `not` and `!`, `if` and modifier `if`, and so on. These pairs also become equal to each other. @@ -786,7 +787,7 @@ and so on. These pairs also become equal to each other. By the way, the symbol of a list was always written at the left side when expressing a list -in Chapter 9: yacc crash course. However, have you noticed it becomes opposite in @if_tail@ ? +in Chapter 9: yacc crash course. However, have you noticed it becomes opposite in `if_tail` ? I'll show only the crucial part again. @@ -797,7 +798,7 @@ if_tail: opt_else ``` -Surely, it is opposite of the previous examples. @if_tail@ which is the symbol +Surely, it is opposite of the previous examples. `if_tail` which is the symbol of a list is at the right side. @@ -812,50 +813,53 @@ list: END_ITEM when you write in this way, it becomes the list that contains continuous zero -or more number of @ITEM@ and ends with @END_ITEM@. +or more number of `ITEM` and ends with `END_ITEM`. As an expression of a list, whichever is used it does not create a so much difference, but the way that the actions are executed is fatally different. -With the form that @list@ is written at the right, the actions are sequentially -executed from the last @ITEM@. We've already learned about the behavior of the -stack of when @list@ is at the left, -so let's try the case that @list@ is at the right. -The input is 4 @ITEM@ s and @END_ITEM@. - - -| | empty at first | -| @ITEM@ | shift @ITEM@ | -| @ITEM ITEM@ | shift @ITEM@ | -| @ITEM ITEM ITEM@ | shift @ITEM@ | -| @ITEM ITEM ITEM ITEM@ | shift @ITEM@ | -| @ITEM ITEM ITEM ITEM END_ITEM@ | shift @END_ITEM@ | -| @ITEM ITEM ITEM ITEM list@ | reduce @END_ITEM@ to @list@ | -| @ITEM ITEM ITEM list@ | reduce @ITEM list@ to @list@ | -| @ITEM ITEM list@ | reduce @ITEM list@ to @list@ | -| @ITEM list@ | reduce @ITEM list@ to @list@ | -| @list@ | reduce @ITEM list@ to @list@ | -| | accept. | - - -When @list@ was at the left, shifts and reductions were done in turns. +With the form that `list` is written at the right, the actions are sequentially +executed from the last `ITEM`. We've already learned about the behavior of the +stack of when `list` is at the left, +so let's try the case that `list` is at the right. +The input is 4 `ITEM` s and `END_ITEM`. + + + +| list | action | +| ------------------------------ | ---------------------------- | +| | empty at first | +| `ITEM` | shift `ITEM` | +| `ITEM ITEM` | shift `ITEM` | +| `ITEM ITEM ITEM` | shift `ITEM` | +| `ITEM ITEM ITEM ITEM` | shift `ITEM` | +| `ITEM ITEM ITEM ITEM END_ITEM` | shift `END_ITEM` | +| `ITEM ITEM ITEM ITEM list` | reduce `END_ITEM` to `list` | +| `ITEM ITEM ITEM list` | reduce `ITEM list` to `list` | +| `ITEM ITEM list` | reduce `ITEM list` to `list` | +| `ITEM list` | reduce `ITEM list` to `list` | +| `list` | reduce `ITEM list` to `list` | +| | accept. | + + +When `list` was at the left, shifts and reductions were done in turns. This time, as you see, there are continuous shifts and continuous reductions. -The reason why @if_tail@ places "@list@ at the right" is to create a syntax tree -from the bottom up. When creating from the bottom up, the node of @if@ will be -left in hand in the end. But if defining @if_tail@ by placing "@list@ at the left", -in order to eventually leave the node of @if@ in hand, it needs to traverse all -links of the @elsif@ and every time @elsif@ +The reason why `if_tail` places "`list` at the right" is to create a syntax tree +from the bottom up. When creating from the bottom up, the node of `if` will be +left in hand in the end. But if defining `if_tail` by placing "`list` at the left", +in order to eventually leave the node of `if` in hand, it needs to traverse all +links of the `elsif` and every time `elsif` is found add it to the end. This is cumbersome. And, slow. -Thus, @if_tail@ is constructed in the "@list@ at the right" manner. +Thus, `if_tail` is constructed in the "`list` at the right" manner. Finally, the meaning of the headline is, in grammar terms, -"the left is @list@" is called left-recursive, -"the right is @list@" is called right-recursive. +"the left is `list`" is called left-recursive, +"the right is `list`" is called right-recursive. These terms are used mainly when reading papers about processing grammars or -writing a book of @yacc@. +writing a book of `yacc`. @@ -877,7 +881,7 @@ Let's look at how the list of statements are joined. The dump of the corresponding syntax tree is shown below. -This is not @nodedump-short@ but in the perfect form. +This is not `nodedump-short` but in the perfect form.

▼Its Syntax Tree

@@ -913,22 +917,22 @@ nd_next: ``` -We can see the list of @NODE_BLOCK@ is created and @NODE_NEWLINE@ are attached +We can see the list of `NODE_BLOCK` is created and `NODE_NEWLINE` are attached as headers. (Fig.5)

(blocklist)
-Fig.5: @NODE_BLOCK@ and @NODE_NEWLINE@ +Fig.5: `NODE_BLOCK` and `NODE_NEWLINE`

-It means, for each statement (@stmt@) @NODE_NEWLINE@ is attached, -and when they are multiple, it will be a list of @NODE_BLOCK@. +It means, for each statement (`stmt`) `NODE_NEWLINE` is attached, +and when they are multiple, it will be a list of `NODE_BLOCK`. Let's also see the code. -

▼ @stmts@

+

▼ `stmts`

```TODO-lang 354 stmts : none @@ -945,21 +949,21 @@ Let's also see the code. ``` -@newline_node()@ caps @NODE_NEWLINE@, @block_append()@ appends it to the list. +`newline_node()` caps `NODE_NEWLINE`, `block_append()` appends it to the list. It's straightforward. -Let's look at the content only of the @block_append()@. +Let's look at the content only of the `block_append()`. -#### @block_append()@ +#### `block_append()` It this function, the error checks are in the very middle and obstructive. Thus I'll show the code without that part. -

▼ @block_append()@ (omitted)

+

▼ `block_append()` (omitted)

```TODO-lang 4285 static NODE* @@ -996,16 +1000,16 @@ Thus I'll show the code without that part. ``` -According to the previous syntax tree dump, @NEW_BLOCK@ was a linked list uses @nd_next@. -Being aware of it while reading, it can be read "if either @head@ or @tail@ is not @NODE_BLOCK@, -wrap it with @NODE_BLOCK@ and join the lists each other." +According to the previous syntax tree dump, `NEW_BLOCK` was a linked list uses `nd_next`. +Being aware of it while reading, it can be read "if either `head` or `tail` is not `NODE_BLOCK`, +wrap it with `NODE_BLOCK` and join the lists each other." -Additionally, on (A-1~3), the @nd_end@ of the @NODE_BLOCK@ of the head of the -list always points to the @NODE_BLOCK@ of the tail of the list. This is probably +Additionally, on (A-1~3), the `nd_end` of the `NODE_BLOCK` of the head of the +list always points to the `NODE_BLOCK` of the tail of the list. This is probably because in this way we don't have to traverse all elements when adding an element to the tail (Fig.6). -Conversely speaking, when you need to add elements later, @NODE_BLOCK@ is suitable. +Conversely speaking, when you need to add elements later, `NODE_BLOCK` is suitable.

@@ -1026,30 +1030,32 @@ But before ending, there's one more thing I'd like to talk about. It is about the two general-purpose lists. -The two general-purpose lists mean @BLOCK@ and @LIST@. -@BLOCK@ is, as previously described, a linked list of @NODE_BLOCK@ to join the statements. -@LIST@ is, although it is called @LIST@, a list of @NODE_ARRAY@. +The two general-purpose lists mean `BLOCK` and `LIST`. +`BLOCK` is, as previously described, a linked list of `NODE_BLOCK` to join the statements. +`LIST` is, although it is called `LIST`, a list of `NODE_ARRAY`. This is what is used for array literals. -@LIST@ is used to store the arguments of a method or the list of multiple assignments. +`LIST` is used to store the arguments of a method or the list of multiple assignments. As for the difference between the two lists, looking at the usage of the nodes is helpful to understand. + -| @NODE_BLOCK@ | @nd_head@ | holding an element | -| | @nd_end@ | pointing to the @NODE_BLOCK@ of the end of the list | -| | @nd_next@ | pointing to the next @NODE_BLOCK@ | -| @NODE_ARRAY@ | @nd_head@ | holding an element | -| | @nd_alen@ | the length of the list that follows this node | -| | @nd_next@ | pointing to the next @NODE_ARRAY@ | +| `NODE_BLOCK` | `nd_head` | holding an element | +| | `nd_end` | pointing to the `NODE_BLOCK` of the end of the list | +| | `nd_next` | pointing to the next `NODE_BLOCK` | +| `NODE_ARRAY` | `nd_head` | holding an element | +| | `nd_alen` | the length of the list that follows this node | +| | `nd_next` | pointing to the next `NODE_ARRAY` | -The usage differs only in the second elements that are @nd_end@ and @nd_alen@. + +The usage differs only in the second elements that are `nd_end` and `nd_alen`. And this is exactly the significance of the existence of each type of the two nodes. -Since its size can be stored in @NODE_ARRAY@, we use an @ARRAY@ list +Since its size can be stored in `NODE_ARRAY`, we use an `ARRAY` list when the size of the list will frequently be required. -Otherwise, we use a @BLOCK@ list that is very fast to join. +Otherwise, we use a `BLOCK` list that is very fast to join. I don't describe this topic in details because the codes that use them is necessary to understand the significance but not shown here, but when the codes appear in Part 3, @@ -1064,7 +1070,7 @@ Semantic Analysis As I briefly mentioned at the beginning of Part 2, there are two types of analysis that are appearance analysis and semantic analysis. -The appearance analysis is mostly done by @yacc@, the rest is doing the semantic +The appearance analysis is mostly done by `yacc`, the rest is doing the semantic analysis inside actions. @@ -1078,31 +1084,31 @@ For example, there are type checks in a language that has types. Alternatively, check if variables with the same name are not defined multiple times, and check if variables are not used before their definitions, and check if the procedure being used is defined, -and check if @return@ is not used outside of procedures, and so on. +and check if `return` is not used outside of procedures, and so on. These are part of the semantic analysis. -What kind of semantic analysis is done in the current @ruby@ ? -Since the error checks occupies almost all of semantic analysis in @ruby@, +What kind of semantic analysis is done in the current `ruby` ? +Since the error checks occupies almost all of semantic analysis in `ruby`, searching the places where generating errors seems a good way. -In a parser of @yacc@, @yyerror()@ is supposed to be called when an error occurs. -Conversely speaking, there's an error where @yyerror()@ exists. -So, I made a list of the places where calling @yyerror()@ inside the actions. +In a parser of `yacc`, `yyerror()` is supposed to be called when an error occurs. +Conversely speaking, there's an error where `yyerror()` exists. +So, I made a list of the places where calling `yyerror()` inside the actions. * an expression not having its value (void value expression) at a place where a value is required -* an @alias@ of @$n@ -* @BEGIN@ inside of a method -* @END@ inside of a method -* @return@ outside of methods +* an `alias` of `$n` +* `BEGIN` inside of a method +* `END` inside of a method +* `return` outside of methods * a local variable at a place where constant is required -* a @class@ statement inside of a method -* an invalid parameter variable (@$gvar@ and @CONST@ and such) +* a `class` statement inside of a method +* an invalid parameter variable (`$gvar` and `CONST` and such) * parameters with the same name appear twice -* an invalid receiver of a singleton method (@def ().method@ and such) +* an invalid receiver of a singleton method (`def ().method` and such) * a singleton method definition on literals * an odd number of a list for hash literals -* an assignment to @self/nil/true/false/__FILE__/__LINE__@ +* an assignment to `self/nil/true/false/__FILE__/__LINE__` * a constant assignment inside of a method * a multiple assignment inside of a conditional expression @@ -1115,7 +1121,7 @@ These checks can roughly be categorized by each purpose as follows: * the others (pure semantic analysis) -For example, "@return@ outside of a method" is a check in order not to make the +For example, "`return` outside of a method" is a check in order not to make the rule too complex. Since this error is a problem of the structure, it can be dealt with by grammar. For example, it's possible by defining the rules separately for both inside and @@ -1124,14 +1130,14 @@ allowed respectively. But this is in any way cumbersome and rejecting it in an action is far more concise. -And, "an assignment to @self@" seems a check for the better error message. -In comparison to "@return@ outside of methods", rejecting it by grammar is much easier, -but if it is rejected by the parser, the output would be just @"parse error"@. +And, "an assignment to `self`" seems a check for the better error message. +In comparison to "`return` outside of methods", rejecting it by grammar is much easier, +but if it is rejected by the parser, the output would be just `"parse error"`. Comparing to it, the current -```TODO-lang +``` % ruby -e 'self = 1' -e:1: Can't change the value of self self = 1 @@ -1142,7 +1148,7 @@ this error is much more friendly. Of course, we can not always say that an arbitrary rule is exactly "for this purpose". -For example, as for "@return@ outside of methods", +For example, as for "`return` outside of methods", this can also be considered that this is a check "for the better error message". The purposes are overlapping each other. @@ -1157,9 +1163,9 @@ What is standing out instead is the cheek of an expression that has its value. To put "having its value" precisely, it is "you can obtain a value as a result of evaluating it". -@return@ and @break@ do not have values by themselves. Of course, a value is -passed to the place where @return@ to, but not any values are left at the place -where @return@ is written. +`return` and `break` do not have values by themselves. Of course, a value is +passed to the place where `return` to, but not any values are left at the place +where `return` is written. Therefore, for example, the next expression is odd, @@ -1171,16 +1177,16 @@ i = return(1) Since this kind of expressions are clearly due to misunderstanding or simple mistakes, it's better to reject when compiling. -Next, we'll look at @value_expr@ which is one of the functions to check if it takes a value. +Next, we'll look at `value_expr` which is one of the functions to check if it takes a value. -### @value_expr()@ +### `value_expr()` -@value_expr()@ is the function to check if it is an @expr@ that has a value. +`value_expr()` is the function to check if it is an `expr` that has a value. -

▼ @value_expr()@

+

▼ `value_expr()`

```TODO-lang 4754 static int @@ -1246,17 +1252,17 @@ Next, we'll look at @value_expr@ which is one of the functions to check if it ta Summary: It sequentially checks the nodes of the tree, if it hits "an expression certainly not having its value", it means the tree does not have any value. -Then it warns about that by using @rb_warning()@ and return @Qfalse@. If it +Then it warns about that by using `rb_warning()` and return `Qfalse`. If it finishes to traverse the entire tree without hitting any "an expression not -having its value", it means the tree does have a value. Thus it returns @Qtrue@. +having its value", it means the tree does have a value. Thus it returns `Qtrue`. Here, notice that it does not always need to check the whole tree. -For example, let's assume @value_expr()@ is called on the argument of a method. +For example, let's assume `value_expr()` is called on the argument of a method. Here: -

▼ check the value of @arg@ by using @value_expr()@

+

▼ check the value of `arg` by using `value_expr()`

```TODO-lang 1055 arg_value : arg @@ -1269,19 +1275,19 @@ Here: ``` -Inside of this argument @$1@, there can also be other nesting method calls again. +Inside of this argument `$1`, there can also be other nesting method calls again. But, the argument of the inside method must have been already checked with -@value_expr()@, so you don't have to check it again. +`value_expr()`, so you don't have to check it again. -Let's think more generally. Assume an arbitrary grammar element @A@ exists, -and assume @value_expr()@ is called against its all composing elements, -the necessity to check the element @A@ again would disappear. +Let's think more generally. Assume an arbitrary grammar element `A` exists, +and assume `value_expr()` is called against its all composing elements, +the necessity to check the element `A` again would disappear. -Then, for example, how is @if@ ? Is it possible to be handled as if @value_expr()@ +Then, for example, how is `if` ? Is it possible to be handled as if `value_expr()` has already called for all elements? If I put only the bottom line, it isn't. -That is because, since @if@ is a statement (which does not use a value), +That is because, since `if` is a statement (which does not use a value), the main body should not have to return a value. For example, in the next case: @@ -1298,7 +1304,7 @@ def method end ``` -This @if@ statement does not need a value.
+This `if` statement does not need a value.
But in the next case, its value is necessary. @@ -1314,9 +1320,9 @@ end ``` -So, in this case, the @if@ statement must be checked when checking the entire -assignment expression. This kind of things are laid out in the @switch@ -statement of @value_expr()@. +So, in this case, the `if` statement must be checked when checking the entire +assignment expression. This kind of things are laid out in the `switch` +statement of `value_expr()`. @@ -1324,7 +1330,7 @@ statement of @value_expr()@. #### Removing Tail Recursion -By the way, when looking over the whole @value_expr@, we can see that there's +By the way, when looking over the whole `value_expr`, we can see that there's the following pattern appears frequently: @@ -1351,16 +1357,16 @@ return value_expr(node->nd_xxxx) ``` -A code like this which does a recursive call just before @return@ is called a -tail recursion. It is known that this can generally be converted to @goto@. +A code like this which does a recursive call just before `return` is called a +tail recursion. It is known that this can generally be converted to `goto`. This method is often used when optimizing. As for Scheme, it is defined in specifications that tail recursions must be removed by language processors. This is because recursions are often used instead of loops in Lisp-like languages. -However, be careful that tail recursions are only when "calling just before @return@". -For example, take a look at the @NODE_IF@ of @value_expr()@, +However, be careful that tail recursions are only when "calling just before `return`". +For example, take a look at the `NODE_IF` of `value_expr()`, @@ -1372,7 +1378,7 @@ break; As shown above, the first time is a recursive call. -Rewriting this to the form of using @return@, +Rewriting this to the form of using `return`, @@ -1381,10 +1387,10 @@ return value_expr(node->nd_body) && value_expr(node->nd_else); ``` -If the left @value_expr()@ is false, the right @value_expr()@ is also executed. -In this case, the left @value_expr()@ is not "just before" @return@. +If the left `value_expr()` is false, the right `value_expr()` is also executed. +In this case, the left `value_expr()` is not "just before" `return`. Therefore, it is not a tail recursion. -Hence, it can't be extracted to @goto@. +Hence, it can't be extracted to `goto`. @@ -1394,7 +1400,7 @@ Hence, it can't be extracted to @goto@. As for value checks, we won't read the functions further. You might think it's too early, but all of the other functions are, as the same -as @value_expr()@, step-by-step one-by-one only traversing and checking nodes, +as `value_expr()`, step-by-step one-by-one only traversing and checking nodes, so they are completely not interesting. However, I'd like to cover the whole picture at least, so I finish this section by just showing the call graph of the relevant functions (Fig.7). @@ -1437,9 +1443,9 @@ p lvar # being defined ``` -In this case, as the assignment to @lvar@ is written at the first line, -in this moment @lvar@ is defined. -When it is undefined, it ends up with a runtime exception @NameError@ as follows: +In this case, as the assignment to `lvar` is written at the first line, +in this moment `lvar` is defined. +When it is undefined, it ends up with a runtime exception `NameError` as follows: @@ -1450,10 +1456,10 @@ for # (NameError) ``` -Why does it say @"local variable or method"@? +Why does it say `"local variable or method"`? As for methods, the parentheses of the arguments can be omitted when calling, so when there's not any arguments, it can't be distinguished from local variables. -To resolve this situation, @ruby@ tries to call it as a method when it finds +To resolve this situation, `ruby` tries to call it as a method when it finds an undefined local variable. Then if the corresponding method is not found, it generates an error such as the above one. @@ -1486,9 +1492,9 @@ lvar = nil # although appearing here ... Be careful about the point of "in the symbol sequence". It has completely nothing to do with the order of evaluations. For example, for the next code, naturally the condition expression is evaluated first, -but in the symbol sequence, at the moment when @p@ appears the assignment -to @lvar@ has not appeared yet. -Therefore, this produces @NameError@. +but in the symbol sequence, at the moment when `p` appears the assignment +to `lvar` has not appeared yet. +Therefore, this produces `NameError`. @@ -1500,9 +1506,9 @@ p(lvar) if lvar = true What we've learned by now is that the local variables are extremely influenced by the appearances. When a symbol sequence that expresses an assignment appears, it will be defined in the appearance order. Based on this information, we can -infer that @ruby@ seems to define local variables while parsing because the +infer that `ruby` seems to define local variables while parsing because the order of the symbol sequence does not exist after leaving the parser. -And in fact, it is true. In @ruby@, the parser defines local variables. +And in fact, it is true. In `ruby`, the parser defines local variables. @@ -1522,10 +1528,10 @@ We'll look at how is the difference from now on. ### The data structure -We'll start with the local variable table @struct local_vars@. +We'll start with the local variable table `struct local_vars`. -

▼ @struct local_vars@

+

▼ `struct local_vars`

```TODO-lang 5174 static struct local_vars { @@ -1541,18 +1547,18 @@ We'll start with the local variable table @struct local_vars@. ``` -The member name @prev@ indicates that the @struct local_vars@ is a +The member name `prev` indicates that the `struct local_vars` is a opposite-direction linked list. ... Based on this, we can expect a stack. -The simultaneously declared global variable @lvtbl@ points to @local_vars@ that +The simultaneously declared global variable `lvtbl` points to `local_vars` that is the top of that stack. -And, @struct RVarmap@ is defined in @env.h@, +And, `struct RVarmap` is defined in `env.h`, and is available to other files and is also used by the evaluator. This is used to store the block local variables. -

▼ @struct RVarmap@

+

▼ `struct RVarmap`

```TODO-lang 52 struct RVarmap { @@ -1567,9 +1573,9 @@ This is used to store the block local variables. -Since there's @struct RBasic@ at the top, this is a Ruby object. +Since there's `struct RBasic` at the top, this is a Ruby object. It means it is managed by the garbage collector. -And since it is joined by the @next@ member, it is probably a linked list. +And since it is joined by the `next` member, it is probably a linked list. Based on the observation we've done and the information that will be explained, @@ -1587,14 +1593,14 @@ Fig.8: The image of local variable tables at runtime ### Local Variable Scope -When looking over the list of function names of @parse.y@, -we can find functions such as @local_push() local_pop() local_cnt()@ are laid out. +When looking over the list of function names of `parse.y`, +we can find functions such as `local_push() local_pop() local_cnt()` are laid out. In whatever way of thinking, they appear to be relating to a local variable. -Moreover, because the names are @push pop@, it is clearly a stack. +Moreover, because the names are `push pop`, it is clearly a stack. So first, let's find out the places where using these functions. -

▼ @local_push() local_pop()@ used examples

+

▼ `local_push() local_pop()` used examples

```TODO-lang 1475 | kDEF fname @@ -1623,20 +1629,20 @@ So first, let's find out the places where using these functions. ``` -At @def@, I could find the place where it is used. It can also be found in class +At `def`, I could find the place where it is used. It can also be found in class definitions and singleton class definitions, and module definitions. In other words, it is the place where the scope of local variables is cut. Moreover, as for how they are used, -it does @push@ where the method definition starts and does @pop@ when the definition ends. +it does `push` where the method definition starts and does `pop` when the definition ends. This means, as we expected, it is almost certain that the functions start with -@local_@ are relating to local variables. And it is also revealed that the part -between @push@ and @pop@ is probably a local variable scope. +`local_` are relating to local variables. And it is also revealed that the part +between `push` and `pop` is probably a local variable scope. -Moreover, I also searched @local_cnt()@. +Moreover, I also searched `local_cnt()`. -

▼ @NEW_LASGN()@

+

▼ `NEW_LASGN()`

```TODO-lang 269 #define NEW_LASGN(v,val) rb_node_newnode(NODE_LASGN,v,val,local_cnt(v)) @@ -1645,19 +1651,19 @@ Moreover, I also searched @local_cnt()@. ``` -This is found in @node.h@. Even though there are also the places where using in @parse.y@, +This is found in `node.h`. Even though there are also the places where using in `parse.y`, I found it in the other file. Thus, probably I'm in desperation. -This @NEW_LASGN@ is "new local assignment". This should mean the node of an +This `NEW_LASGN` is "new local assignment". This should mean the node of an assignment to a local variable. And also considering the place where using it, -the parameter @v@ is apparently the local variable name. @val@ is probably +the parameter `v` is apparently the local variable name. `val` is probably (a syntax tree that represents). the right-hand side value -Based on the above observations, @local_push()@ is at the beginning of the local variable, -@local_cnt()@ is used to add a local variable if there's a local variable assignment in the halfway, -@local_pop()@ is used when ending the scope. +Based on the above observations, `local_push()` is at the beginning of the local variable, +`local_cnt()` is used to add a local variable if there's a local variable assignment in the halfway, +`local_pop()` is used when ending the scope. This perfect scenario comes out. (Fig.9) @@ -1670,10 +1676,10 @@ Fig.9: the flow of the local variable management Then, let's look at the content of the function. -### @push@ and @pop@ +### `push` and `pop` -

▼ @local_push()@

+

▼ `local_push()`

```TODO-lang 5183 static void @@ -1701,15 +1707,15 @@ Then, let's look at the content of the function. ``` -As we expected, it seems that @struct local_vars@ is used as a stack. -Also, we can see @lvtbl@ is pointing to the top of the stack. -The lines relates to @rb_dvar_push()@ will be read later, so it is left untouched for now. +As we expected, it seems that `struct local_vars` is used as a stack. +Also, we can see `lvtbl` is pointing to the top of the stack. +The lines relates to `rb_dvar_push()` will be read later, so it is left untouched for now. -Subsequently, we'll look at @local_pop()@ and @local_tbl()@ at the same time. +Subsequently, we'll look at `local_pop()` and `local_tbl()` at the same time. -

▼ @local_tbl local_pop@

+

▼ `local_tbl local_pop`

```TODO-lang 5218 static ID* @@ -1737,27 +1743,27 @@ Subsequently, we'll look at @local_pop()@ and @local_tbl()@ at the same time. ``` -I'd like you to look at @local_tbl()@. -This is the function to obtain the current local variable table (@lvtbl->tbl@). -By calling this, the @nofree@ of the current table becomes true. -The meaning of @nofree@ seems naturally "Don't @free()@". +I'd like you to look at `local_tbl()`. +This is the function to obtain the current local variable table (`lvtbl->tbl`). +By calling this, the `nofree` of the current table becomes true. +The meaning of `nofree` seems naturally "Don't `free()`". In other words, this is like reference counting, "this table will be used, so -please don't @free()@". Conversely speaking, -when @local_tbl()@ was not called with a table even once, +please don't `free()`". Conversely speaking, +when `local_tbl()` was not called with a table even once, that table will be freed at the moment when being popped and be discarded. For example, this situation probably happens when a method without any local variables. -However, the "necessary table" here means @lvtbl->tbl@. -As you can see, @lvtbl@ itself will be freed at the same moment when being popped. -It means only the generated @lvtbl->tbl@ is used in the evaluator. -Then, the structure of @lvtbl->tbl@ is becoming important. -Let's look at the function @local_cnt()@ (which seems) to add variables +However, the "necessary table" here means `lvtbl->tbl`. +As you can see, `lvtbl` itself will be freed at the same moment when being popped. +It means only the generated `lvtbl->tbl` is used in the evaluator. +Then, the structure of `lvtbl->tbl` is becoming important. +Let's look at the function `local_cnt()` (which seems) to add variables which is probably helpful to understand how the structure is. -And before that, I'd like you to remember that @lvtbl->cnt@ is stored -at the index 0 of the @lvtbl->tbl@. +And before that, I'd like you to remember that `lvtbl->cnt` is stored +at the index 0 of the `lvtbl->tbl`. @@ -1765,10 +1771,10 @@ at the index 0 of the @lvtbl->tbl@. ### Adding variables -The function (which seems) to add a local variable is @local_cnt()@. +The function (which seems) to add a local variable is `local_cnt()`. -

▼ @local_cnt()@

+

▼ `local_cnt()`

```TODO-lang 5246 static int @@ -1789,17 +1795,17 @@ The function (which seems) to add a local variable is @local_cnt()@. ``` -This scans @lvtbl->tbl@ and searches what is equals to @id@. -If the searched one is found, it straightforwardly returns @cnt-1@. -If nothing is found, it does @local_append()@. -@local_append()@ must be, as it is called @append@, the procedure to append. -In other words, @local_cnt()@ checks if the variable was already registered, -if it was not, adds it by using @local_append()@ and returns it. +This scans `lvtbl->tbl` and searches what is equals to `id`. +If the searched one is found, it straightforwardly returns `cnt-1`. +If nothing is found, it does `local_append()`. +`local_append()` must be, as it is called `append`, the procedure to append. +In other words, `local_cnt()` checks if the variable was already registered, +if it was not, adds it by using `local_append()` and returns it. -What is the meaning of the return value of this function? @lvtbl->tbl@ seems an +What is the meaning of the return value of this function? `lvtbl->tbl` seems an array of the variables, so there're one-to-one correspondences between the -variable names and "their index - 1 (@cnt-1@)". (Fig.10) +variable names and "their index - 1 (`cnt-1`)". (Fig.10)

@@ -1817,17 +1823,17 @@ If it is not, like the instance variables or constants, You might want to know why it is avoiding index 0 (the loop start -from @cnt=1@) for some reasons, it is probably to store a value at @local_pop()@. +from `cnt=1`) for some reasons, it is probably to store a value at `local_pop()`. -Based on the knowledge we've learned, we can understand the role of @local_append()@ +Based on the knowledge we've learned, we can understand the role of `local_append()` without actually looking at the content. It registers a local variable and returns -"(the index of the variable in @lvtbl->tbl@) - 1". +"(the index of the variable in `lvtbl->tbl`) - 1". It is shown below, let's make sure. -

▼ @local_append()@

+

▼ `local_append()`

```TODO-lang 5225 static int @@ -1855,25 +1861,25 @@ It is shown below, let's make sure. ``` -It seems definitely true. @lvtbl->tbl@ is an array of the local variable names, +It seems definitely true. `lvtbl->tbl` is an array of the local variable names, and its index - 1 is the return value (local variable ID). -Note that it increases @lvtbl->cnt@. -Since the code to increase @lvtbl->cnt@ only exists here, +Note that it increases `lvtbl->cnt`. +Since the code to increase `lvtbl->cnt` only exists here, from only this code its meaning can be decided. Then, what is the meaning? It is, -since "@lvtbl->cnt@ increases by 1 when a new variable is added", -"@lvtbl->cnt@ holds the number of local variables in this scope". +since "`lvtbl->cnt` increases by 1 when a new variable is added", +"`lvtbl->cnt` holds the number of local variables in this scope". -Finally, I'll explain about @tbl[1]@ and @tbl[2]@. These @'_'@ and @'~'@ are, +Finally, I'll explain about `tbl[1]` and `tbl[2]`. These `'_'` and `'~'` are, as you can guess if you are familiar with Ruby, -the special variables named @$_@ and @$~@. +the special variables named `$_` and `$~`. Though their appearances are identical to global variables, they are actually local variables. -Even If you didn't explicitly use it, when the methods such as @Kernel#gets@ are called, +Even If you didn't explicitly use it, when the methods such as `Kernel#gets` are called, these variables are implicitly assigned, thus it's necessary that the spaces are always allocated. @@ -1888,13 +1894,13 @@ let's summarize it. First, It seems the local variables are different from the other variables -because they are not managed with @st_table@. +because they are not managed with `st_table`. Then, where are they stored in? It seems the answer is an array. Moreover, it is stored in a different array for each scope. -The array is @lvtbl->tbl@, and the index 0 holds the @lvtbl->cnt@ which is set -at @local_pop()@. In other words, it holds the number of the local variables. +The array is `lvtbl->tbl`, and the index 0 holds the `lvtbl->cnt` which is set +at `local_pop()`. In other words, it holds the number of the local variables. The index 1 or more hold the local variable names defined in the scope. Fig.11 shows the final appearance we expect. @@ -1911,16 +1917,16 @@ Fig.11: correspondences between local variable names and the return values ### Block Local Variables -The rest is @dyna_vars@ which is a member of @struct local_vars@. +The rest is `dyna_vars` which is a member of `struct local_vars`. In other words, this is about the block local variables. I thought that there must be the functions to do something with this, looked over the list of the function names, and found them as expected. -There are the suspicious functions named @dyna_push() dyna_pop() dyna_in_block()@. +There are the suspicious functions named `dyna_push() dyna_pop() dyna_in_block()`. Moreover, here is the place where these are used. -

▼ an example using @dyna_push dyna_pop@

+

▼ an example using `dyna_push dyna_pop`

```TODO-lang 1651 brace_block : '{' @@ -1939,13 +1945,13 @@ Moreover, here is the place where these are used. ``` -@push@ at the beginning of an iterator block, @pop@ at the end. +`push` at the beginning of an iterator block, `pop` at the end. This must be the process of block local variables. Now, we are going to look at the functions. -

▼ @dyna_push()@

+

▼ `dyna_push()`

```TODO-lang 5331 static struct RVarmap* @@ -1962,12 +1968,12 @@ Now, we are going to look at the functions. ``` -Increasing @lvtbl->dlev@ seems the mark indicates the existence of the block +Increasing `lvtbl->dlev` seems the mark indicates the existence of the block local variable scope. -Meanwhile, @rb_dvar_push()@ is ... +Meanwhile, `rb_dvar_push()` is ... -

▼ @rb_dvar_push()@

+

▼ `rb_dvar_push()`

```TODO-lang 691 void @@ -1982,26 +1988,26 @@ Meanwhile, @rb_dvar_push()@ is ... ``` -It creates a @struct RVarmap@ that has the variable name @id@ and the value -@val@ as its members, adds it to the top of the global variable @ruby_dyna_vars@. +It creates a `struct RVarmap` that has the variable name `id` and the value +`val` as its members, adds it to the top of the global variable `ruby_dyna_vars`. This is again and again the form of cons. -In @dyna_push()@, @ruby_dyan_vars@ is not set aside, -it seems it adds directly to the @ruby_dyna_vars@ of the previous scope. +In `dyna_push()`, `ruby_dyan_vars` is not set aside, +it seems it adds directly to the `ruby_dyna_vars` of the previous scope. -Moreover, the value of the @id@ member of the @RVarmap@ to be added here is 0. +Moreover, the value of the `id` member of the `RVarmap` to be added here is 0. Although it was not seriously discussed in this book, -the @ID@ of @ruby@ will never be 0 while it is normally created by @rb_intern()@. -Thus, we can infer that this @RVarmap@, as it is like @NUL@ or @NULL@, +the `ID` of `ruby` will never be 0 while it is normally created by `rb_intern()`. +Thus, we can infer that this `RVarmap`, as it is like `NUL` or `NULL`, probably has a role as sentinel. If we think based on this assumption, we can describe the reason why the holder -of a variable (@RVarmap@) is added even though not any variables are added. +of a variable (`RVarmap`) is added even though not any variables are added. -Next, @dyna_pop()@. +Next, `dyna_pop()`. -

▼ @dyna_pop()@

+

▼ `dyna_pop()`

```TODO-lang 5341 static void @@ -2016,18 +2022,18 @@ Next, @dyna_pop()@. ``` -By reducing @lvtbl->dlev@, it writes down the fact that the block local +By reducing `lvtbl->dlev`, it writes down the fact that the block local variable scope ended. It seems that something is done by using the argument, let's see this later at once. The place to add a block local variable has not appeared yet. -Something like @local_cnt()@ of local variables is missing. -So, I did plenty of @grep@ with @dvar@ and @dyna@, and this code was found. +Something like `local_cnt()` of local variables is missing. +So, I did plenty of `grep` with `dvar` and `dyna`, and this code was found. -

▼ @assignable()@ (partial)

+

▼ `assignable()` (partial)

```TODO-lang 4599 static NODE* @@ -2043,27 +2049,27 @@ So, I did plenty of @grep@ with @dvar@ and @dyna@, and this code was found. ``` -@assignable()@ is the function to create a node relates to assignments, +`assignable()` is the function to create a node relates to assignments, this citation is the fragment of that function only contains the part to deal with block local variables. -It seems that it adds a new variable (to @ruby_dyna_vars@) -by using @rb_dvar_push()@ that we've just seen. +It seems that it adds a new variable (to `ruby_dyna_vars`) +by using `rb_dvar_push()` that we've just seen. -### @ruby_dyna_vars@ in the parser +### `ruby_dyna_vars` in the parser Now, taking the above all into considerations, let's imagine the appearance of -@ruby_dyna_vars@ at the moment when a local variable scope is finished to be +`ruby_dyna_vars` at the moment when a local variable scope is finished to be parsed. First, as I said previously, -the @RVarmap@ of @id=0@ which is added at the beginning of a block scope is a +the `RVarmap` of `id=0` which is added at the beginning of a block scope is a sentinel which represents a break between two block scopes. -We'll call this "the header of @ruby_dyna_vars@". +We'll call this "the header of `ruby_dyna_vars`". Next, among the previously shown actions of the rule of the iterator block, @@ -2079,9 +2085,9 @@ dyna_pop($2); /* …… appears at $2 */ ``` -@dyna_push()@ returns the @ruby_dyna_vars@ at the moment. -@dyna_pop()@ put the argument into @ruby_dyna_vars@. -This means @ruby_dyna_vars@ would be saved and restored for each the block local +`dyna_push()` returns the `ruby_dyna_vars` at the moment. +`dyna_pop()` put the argument into `ruby_dyna_vars`. +This means `ruby_dyna_vars` would be saved and restored for each the block local variable scope. Therefore, when parsing the following program, @@ -2107,12 +2113,12 @@ iter { ``` -Fig.12 shows the @ruby_dyna_vars@ in this situation. +Fig.12 shows the `ruby_dyna_vars` in this situation.

(dynavars)
-Fig.12: @ruby_dyna_vars@ when all scopes are finished to be parsed +Fig.12: `ruby_dyna_vars` when all scopes are finished to be parsed

@@ -2123,7 +2129,7 @@ This way has the simpler searching process than creating a different table for each level. -Plus, in the figure, it looks like @bb@ is hung at a strange place, +Plus, in the figure, it looks like `bb` is hung at a strange place, but this is correct. When a variable is found at the nest level which is decreased after increased once, it is attached to the subsequent of the list of the original level. @@ -2133,11 +2139,11 @@ is expressed in a natural form. And finally, at each cut of local variable scopes (this is not of block local -variable scopes), this link is entirely saved or restored to @lvtbl->dyna_vars@. -I'd like you to go back a little and check @local_push()@ and @local_pop()@. +variable scopes), this link is entirely saved or restored to `lvtbl->dyna_vars`. +I'd like you to go back a little and check `local_push()` and `local_pop()`. -By the way, although creating the @ruby_dyna_vars@ list was a huge task, +By the way, although creating the `ruby_dyna_vars` list was a huge task, it is by itself not used at the evaluator. This list is used only to check the existence of the variables and will be garbage collected at the same moment when parsing is finished. And after entering the evaluator, another chain is From b1229c08132a501cb4e1578bd34037c815c1f374 Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Sun, 18 Apr 2021 17:05:11 +0200 Subject: [PATCH 11/14] fix all figures --- anyeval.md | 18 ++++----- class.md | 2 +- contextual.md | 4 +- evaluator.md | 54 +++++++++---------------- gc.md | 4 +- intro.md | 2 +- iterator.md | 6 +-- load.md | 19 +++++---- minimum.md | 2 +- module.md | 84 +++++++++++++++++--------------------- name.md | 2 +- object.md | 10 ++--- parser.md | 4 +- syntree.md | 109 ++++++++++++++++++++++---------------------------- thread.md | 34 ++++++++-------- yacc.md | 12 +++--- 16 files changed, 161 insertions(+), 205 deletions(-) diff --git a/anyeval.md b/anyeval.md index 67522a2..1912fb2 100644 --- a/anyeval.md +++ b/anyeval.md @@ -503,11 +503,11 @@ It means, it would look like Figure 1. Now, it is assured that we can refer to the block local variables of the outside scope from inside of a string to `eval`. +
+ figure 1: `ruby_dyna_vars` inside `eval` +
figure 1: ruby_dyna_vars inside eval
+
-
-(dynavars)
-Figure 1: `ruby_dyna_vars` inside `eval` -
Well, it's sure we can refer to, @@ -590,12 +590,10 @@ that part is defined as `specific_eval()`. Figure 2 shows it and also what will be described. What with parentheses are calls by function pointers. - -

-(speceval)
-Figure 2: Call Graph -

- +
+ figure 2: Call Graph +
figure 2: Call Graph
+
Whichever `instance_eval` or `module_eval`, it can accept both a block and a string, diff --git a/class.md b/class.md index 996a176..1db8410 100644 --- a/class.md +++ b/class.md @@ -664,7 +664,7 @@ vertical direction is inheritance (the superclasses are above).
figure 1: `rb_singleton_class` -
figure 1: `rb_singleton_class`
+
figure 1: rb_singleton_class
When comparing the first and last part of this diagram, you can diff --git a/contextual.md b/contextual.md index 2567dcf..5b6d95a 100644 --- a/contextual.md +++ b/contextual.md @@ -177,7 +177,7 @@ which case suits best (see image 1)
figure 1: Transition to `EXPR_BEG` -
figure 1: Transition to `EXPR_BEG`
+
figure 1: Transition to EXPR_BEG
((errata:
@@ -1161,7 +1161,7 @@ will appear from `tLPAREN_ARG` and conflict with `method_call` (see image 3)
figure 3: `method_call` and `command_call` -
figure 3: `method_call` and `command_call`
+
figure 3: method_call and command_call
### The case of two parameters and more diff --git a/evaluator.md b/evaluator.md index 636cd63..6fb9bfb 100644 --- a/evaluator.md +++ b/evaluator.md @@ -807,24 +807,19 @@ The ordinary functions return only once for each call. However, it's possible `setjmp()` returns twice. Is it helpful to grasp the concept if I say that it is something like `fork()`? - - -

TODO -(setjmp)
-Fig.2: `setjmp()` `longjmp()` Image -

- +
+ figure 2: `setjmp()` `longjmp()` Image +
figure 2: setjmp() longjmp() Image
+
Now, we've learned about `setjmp()` as a preparation. In `eval.c`, `EXEC_TAG` corresponds to `setjmp()` and `JUMP_TAG()` corresponds to `longjmp()` respectively. (Fig.3) - -

-(jumptag)
-Fig.3: "tag jump" image -

- +
+ figure 3: "tag jump" image +
figure 3: "tag jump" image
+
Take a look at this image, it seems that `EXEC_TAG()` does not have any arguments. Where has `jmp_buf` gone? @@ -905,12 +900,10 @@ and its cost of the memory allocation is next to nothing. This technique is only possible because the `ruby` evaluator is made of recursive calls of `rb_eval()`. - - -

-(tagstack)
-Fig.4: the tag stack is embedded in the machine stack -

+
+ figure 4: the tag stack is embedded in the machine stack +
figure 4: the tag stack is embedded in the machine stack
+
Because of this implementation, it's necessary that `PUSH_TAG` and `POP_TAG` @@ -948,11 +941,10 @@ After that, it executes the main body by calling `rb_eval()` recursively. If there's `break` or `next`, it does `JUMP_TAG()` (`longjmp`). Then, it can go back to the start point of the `while` loop. (Fig.5) - -

TODO -(whilejmp)
-Fig.5: the implementation of `while` by using "tag jump" -

+
+ figure 5: the implementation of `while` by using "tag jump" +
figure 5: the implementation of while by using "tag jump"
+
Though `break` was taken as an example here, what cannot be implemented without @@ -1260,18 +1252,12 @@ the return value can be passed to the next tag without particular thought. ``` - This can probably be depicted as Fig.6. - -

-(usetag)
-Fig.6: Transferring the return value -

- - - - +
+ figure 6: Transferring the return value +
figure 6: Transferring the return value
+
Exception diff --git a/gc.md b/gc.md index 8a24829..ccaf115 100644 --- a/gc.md +++ b/gc.md @@ -431,7 +431,7 @@ Each element of `heap` is each `slot` (Figure 9).
figure 9: `heaps`, `heap`, `slot` -
figure 9: `heaps`, `heap`, `slot`
+
figure 9: heaps, heap, slot
The length of `heaps` is `heap_length` and it can be changed. The number of @@ -441,7 +441,7 @@ Figure 10 shows the structure of the object heap.
figure 10: conceptual diagram of `heaps` in memory -
figure 10: conceptual diagram of `heaps` in memory
+
figure 10: conceptual diagram of heaps in memory
This structure has a necessity to be this way. diff --git a/intro.md b/intro.md index d813cf7..93b0505 100644 --- a/intro.md +++ b/intro.md @@ -965,7 +965,7 @@ it would be like Figure 1.
figure 1: The process until `Makefile` is created -
figure 1: The process until `Makefile` is created
+
figure 1: The process until Makefile is created
For the readers who want to know more details, diff --git a/iterator.md b/iterator.md index c083628..1824c3a 100644 --- a/iterator.md +++ b/iterator.md @@ -147,7 +147,7 @@ It's possible that the block is pushed for the previous method. (Figure 1)
figure 1: no one-to-one correspondence between `FRAME` and `BLOCK` -
figure 1: no one-to-one correspondence between `FRAME` and `BLOCK`
+
figure 1: no one-to-one correspondence between FRAME and BLOCK
@@ -666,7 +666,7 @@ If we focus on there, we can see a link is always pushed in at the "next" to
figure 4: the structure of `ruby_dyna_vars` -
figure 4: the structure of `ruby_dyna_vars`
+
figure 4: the structure of ruby_dyna_vars
@@ -794,7 +794,7 @@ Therefore, if `0x10` did not exist, `state` would be the same value as `TAG_xxxx
figure 6: `block->tag->dst` -
figure 6: `block->tag->dst`
+
figure 6: block->tag->dst
diff --git a/load.md b/load.md index c9e1770..5350ab8 100644 --- a/load.md +++ b/load.md @@ -449,10 +449,10 @@ returned back to the thread where it was called. When the file name disappears from `loading_tbl`, the loading is finished so the function can end. The `curr_thread` check is not to lock itself (figure 1). -
-(loadwait)
-Figure 1: Serialisation of loads -
+
+ figure 1: Serialisation of loads +
figure 1: Serialisation of loads
+
Loading of Ruby programs ------------------------ @@ -808,13 +808,12 @@ entirely, and connecting them each other. In other words, pulling the lines from all of "the necessary names", each line must be connected to one of "the providing names" of a particular object file. (Figure. 2) -To put this in technical terms, -it is resolving undefined symbols. +To put this in technical terms, it is resolving undefined symbols. -
-(link)
-Figure 2: object files and linking -
+
+ figure 2: object files and linking +
figure 2: object files and linking
+
Logically this is how it is, but in reality a program can't run only because of this. At least, C programs cannot run without converting the names to the diff --git a/minimum.md b/minimum.md index e200b0a..4944f53 100644 --- a/minimum.md +++ b/minimum.md @@ -1116,7 +1116,7 @@ call `puts` and `p` also at the toplevel.
figure 10: `main`, `Object` and `Kernel` -
figure 10: `main`, `Object` and `Kernel`
+
figure 10: main, Object and Kernel
Thus `p` isn't a function, it's a method. Just because diff --git a/module.md b/module.md index 3eb464d..5a5ea86 100644 --- a/module.md +++ b/module.md @@ -43,11 +43,10 @@ consequence. This is why, even if a procedure is called anytime and any number of times, we only have to write its code once (Fig. 1). - -

-(stack)
-Fig.1: What is changing is only the stack -

+
+ figure 1: What is changing is only the stack +
figure 1: What is changing is only the stack
+
The execution of Ruby is also basically nothing but chained calls of methods @@ -141,11 +140,10 @@ general noun and `FRAME` when it means `struct FRAME`. First af all, since there's the `prev` member, you can infer that the stack is made of a linked list. (Fig.2) - -

-(framestack)
-Fig.2: `ruby_frame` -

+
+ figure 2: `ruby_frame` +
figure 2: ruby_frame
+
The fact that `ruby_xxxx` points to the top stack frame is common to all stacks @@ -390,13 +388,10 @@ The break between blocks are similar to the one of the parser, it is expressed by a `RVarmap` (header) whose `id` is `0`. Details are deferred again. It will be explained in Chapter 16: Blocks. - -

-(vars)
-Fig.3: `ruby_dyna_vars` -

- - +
+ figure 3: `ruby_dyna_vars` +
figure 3: ruby_dyna_vars
+
### `ruby_class` @@ -468,10 +463,10 @@ end Fig.4 shows how `ruby_cref` is when evaluating the code (A). -

-(crefstack)
-Fig.4: `ruby_cref` -

+
+ figure 4: `ruby_cref` +
figure 4: ruby_cref
+
However, illustrating this image everytime is tedious and its intention becomes unclear. @@ -866,11 +861,10 @@ machine stack. What differentiate slightly is that the spaces of the stack frames are allocated in the heap, the machine stack is used in order to create the stack structure (Fig.5.). - -

-(scopestack)
-Fig.5. The machine stack and the SCOPE Stack -

+
+ figure 5: The machine stack and the SCOPE Stack +
figure 5: The machine stack and the SCOPE Stack
+
Additionally, the flags like `SCOPE_` something repeatedly appearing in the @@ -923,11 +917,10 @@ Because they are confusing, it's a good thing writing some comments such as "This is the variable name", "this is the value". The one with `tbl` is for the names. - -

-(localvars)
-Fig.6. `ruby_scope->local_vars` -

+
+ figure 6: `ruby_scope->local_vars` +
figure 6: ruby_scope->local_vars
+
Where is this `node` used? @@ -1057,11 +1050,10 @@ However, the arguments of a method are obviously `VALUE` s and the GC could not find a `VALUE` if it is stored in the heap. Therefore, it is enforced that GC can find it through `NODE`. - -

-(tmpprotecttmp)
-Fig.7. anchor the space to the stack through `NODE` -

+
+ figure 7: anchor the space to the stack through `NODE` +
figure 7: anchor the space to the stack through NODE
+
On the contrary, in the environment with the true `alloca()`, we can naturally @@ -1667,14 +1659,10 @@ and insert into the just pushed `ruby_frame->cbase` (Fig.84) ... This is the mechanism. Complicated. - - -

-(cbase)
-Fig 8. CREF Trasfer -

- - +
+ figure 8: CREF Transfer +
figure 8: CREF Transfer
+
#### `ev_const_get()` @@ -2033,10 +2021,10 @@ So, what it is doing here is adjusting to either `list` and `val` which is shorter and doing one-to-one assignments. (Fig.9) -

-(massign)
-Fig.9. assign when corresponded -

+
+ figure 9: assign when corresponded +
figure 9: assign when corresponded
+
(B) if there are remainders on the right-hand side, turn them into a Ruby diff --git a/name.md b/name.md index ee33a54..ebddf53 100644 --- a/name.md +++ b/name.md @@ -160,7 +160,7 @@ and understand the roles.
figure 5: `st_table` data structure -
figure 5: `st_table` data structure
+
figure 5: st_table data structure
So, let us comment on `st_hash_type`. diff --git a/object.md b/object.md index 361ddda..88e1e70 100644 --- a/object.md +++ b/object.md @@ -37,7 +37,7 @@ the pointer type will always be `VALUE` (figure 1).
figure 1: `VALUE` and struct -
figure 1: `VALUE` and struct
+
figure 1: VALUE and struct
Here is the definition of `VALUE`: @@ -142,7 +142,7 @@ of the type of struct pointed to by `VALUE`.
figure 3: `struct RBasic` -
figure 3: `struct RBasic`
+
figure 3: struct RBasic
Because it is purposefully designed this way, @@ -228,7 +228,7 @@ because this is prepared for the time when you will be wondering about it later.
figure 5: Use of `flags` -
figure 5: Use of `flags`
+
figure 5: Use of flags
When looking at the diagram, it looks like that 21 bits are not used on 32 bit @@ -712,7 +712,7 @@ solved by using a global `st_table`, `generic_iv_table` (figure 7).
figure 7: `generic_iv_table` -
figure 7: `generic_iv_table`
+
figure 7: generic_iv_table
Let's see this in practice. @@ -1198,5 +1198,5 @@ after we'll finish chapter 5 "Garbage collection."
figure 8: Representation of `struct RData` -
figure 8: Representation of `struct RData`
+
figure 8: Representation of struct RData
diff --git a/parser.md b/parser.md index 0d36983..47d6af3 100644 --- a/parser.md +++ b/parser.md @@ -264,7 +264,7 @@ reduced during parsing.
figure 2: `expr` demotion -
figure 2: `expr` demotion
+
figure 2: expr demotion
The next rule is also particularly interesting. @@ -281,7 +281,7 @@ a `primary` with this rule. The next figure illustrates this rule in action.
figure 3: `program` demotion -
figure 3: `program` demotion
+
figure 3: program demotion
This means that for any syntax element in Ruby, if we surround it with diff --git a/syntree.md b/syntree.md index 27f9ff7..b11eb24 100644 --- a/syntree.md +++ b/syntree.md @@ -158,10 +158,10 @@ It won't be so much trouble if we'll keep focus on around `nd_type`. Fig.1 shows how it seems like. -

-(flagUsage)
-Fig.1: The usage of RNode.flags -

+
+ figure 1: The usage of `RNode.flags` +
figure 1: The usage of RNode.flags
+
And, since macros cannot be used from debuggers, @@ -247,10 +247,10 @@ when subtracting 1. We can eventually understand that `NODE_LMASK` is a sequence filled with 1 whose size is the number of the bits that are still available. -

-(lmask)
-Fig.2: `NODE_LMASK` -

+
+ figure 2: `NODE_LMASK` +
figure 2: NODE_LMASK
+
@@ -268,10 +268,10 @@ leaves only the unused space. Fig.3 shows how `flags` is used. Since `FL_USHIFT` is 11, in 32-bit machine 32-(11+8)=13 bits are available for the line number. -

-(flags)
-Fig.3: How `flags` are used at `NODE` -

+
+ figure 3 How `flags` are used at `NODE` +
figure 3 How flags are used at NODE
+
... This means, if the line numbers becomes beyond 2^13=8192, @@ -449,10 +449,10 @@ But since these explanation in text are probably not intuitive, I recommend you to also check Fig.4 at the same time. -

-(stree)
-Fig.4: Syntax Tree -

+
+ figure 4: Syntax Tree +
figure 4: Syntax Tree
+
I'll explain the meaning of each node. `NODE_CALL` is a Function CALL. @@ -921,10 +921,10 @@ We can see the list of `NODE_BLOCK` is created and `NODE_NEWLINE` are attached as headers. (Fig.5) -

-(blocklist)
-Fig.5: `NODE_BLOCK` and `NODE_NEWLINE` -

+
+ figure 5: `NODE_BLOCK` and `NODE_NEWLINE` +
figure 5: NODE_BLOCK and NODE_NEWLINE
+
It means, for each statement (`stmt`) `NODE_NEWLINE` is attached, @@ -1011,13 +1011,10 @@ because in this way we don't have to traverse all elements when adding an element to the tail (Fig.6). Conversely speaking, when you need to add elements later, `NODE_BLOCK` is suitable. - -

-(append)
-Fig.6: Appending is easy. -

- - +
+ figure 6: Appending is easy +
figure 6: Appending is easy
+
### The two types of lists @@ -1406,13 +1403,10 @@ However, I'd like to cover the whole picture at least, so I finish this section by just showing the call graph of the relevant functions (Fig.7). -

-(callgraph)
-Fig.7: the call graph of the value check functions -

- - - +
+ figure 7: the call graph of the value check functions +
figure 7: the call graph of the value check functions
+
@@ -1582,11 +1576,10 @@ Based on the observation we've done and the information that will be explained, Fig.8 illustrates the image of both structs while executing the parser. -

-(localvars)
-Fig.8: The image of local variable tables at runtime -

- +
+ figure 8: The image of local variable tables at runtime +
figure 8: The image of local variable tables at runtime
+
@@ -1666,12 +1659,10 @@ Based on the above observations, `local_push()` is at the beginning of the local `local_pop()` is used when ending the scope. This perfect scenario comes out. (Fig.9) - -

-(localtbl)
-Fig.9: the flow of the local variable management -

- +
+ figure 9: the flow of the local variable management +
figure 9: the flow of the local variable management
+
Then, let's look at the content of the function. @@ -1807,11 +1798,10 @@ What is the meaning of the return value of this function? `lvtbl->tbl` seems an array of the variables, so there're one-to-one correspondences between the variable names and "their index - 1 (`cnt-1`)". (Fig.10) - -

-(lvtbltbl)
-Fig.10: The correspondences between the variable names and the return values -

+
+ figure 10: The correspondences beween the variable names and the return values +
figure 10: The correspondences beween the variable names and the return values
+
Moreover, this return value is calculated so that the start point becomes 0, @@ -1904,13 +1894,10 @@ at `local_pop()`. In other words, it holds the number of the local variables. The index 1 or more hold the local variable names defined in the scope. Fig.11 shows the final appearance we expect. - -

-(tbl)
-Fig.11: correspondences between local variable names and the return values -

- - +
+ figure 11: correspondences beween local variable names and the return values +
figure 11: correspondences beween local variable names and the return values
+
@@ -2116,10 +2103,10 @@ iter { Fig.12 shows the `ruby_dyna_vars` in this situation. -

-(dynavars)
-Fig.12: `ruby_dyna_vars` when all scopes are finished to be parsed -

+
+ figure 12: `ruby_dyna_vars` when all scopes are finished to be parsed +
figure 12: ruby_dyna_vars when all scopes are finished to be parsed
+
This structure is fairly smart. diff --git a/thread.md b/thread.md index f6ca202..b32e6e5 100644 --- a/thread.md +++ b/thread.md @@ -169,12 +169,10 @@ the both ends are connected. It means, it is circular. This is a big point. Adding the static `main_thread` and `curr_thread` variables to it, the whole data structure would look like Figure 1. - -

-(thread)
-Figure 1: the data structures to manage threads -

- +
+ figure 1: the data structures to manage threads +
figure 1: the data structures to manage threads
+
`main_thread` (main thread) means the thread existed at the time when a program started, meaning the "first" thread. `curr_thread` is obviously `current thread`, @@ -822,10 +820,10 @@ FD_ISSET(fd, &set) /* true if fd is in the set */ and when we want to check n-th file descriptor, the n-th bit is set (Figure 2). -
-(fdset)
-Figure 2: fd_set -
+
+ figure 2: `fd_set` +
figure 2: fd_set
+
I'll show a simple usage example of `select`. @@ -1251,10 +1249,10 @@ this pair of `setjmp()` and `longjmp()` is not completed in this thread. In other words, there's a chain of `setjmp`/`longjmp()` as follows. (Figure 3) -
-(setjmploop)
-Figure 3: the backstitch by chaining of `setjmp` -
+
+ figure 3: the backstitch by chaining of `setjmp` +
figure 3: the backstitch by chaining of setjmp
+
We can restore around the CPU registers with `setjmp()`/`longjmp()`, @@ -1317,10 +1315,10 @@ it is because both a stack extending higher and a stack extending lower are possible. (Figure 4) -
-(twodirection)
-Fig.4: a stack extending above and a stack extending below -
+
+ figure 4: a stack extending above and a stack extending below +
figure 4: a stack extending above and a stack extending below
+
After that, the rest is allocating a memory in where `th->stkptr` points to and diff --git a/yacc.md b/yacc.md index 59c85b6..3c70abf 100644 --- a/yacc.md +++ b/yacc.md @@ -120,8 +120,8 @@ code. This file can then be compiled as usual (figure 1 shows the full process).
- figure 1: Figure 1: File dependencies -
figure 1: Figure 1: File dependencies
+ figure 1: File dependencies +
figure 1: File dependencies
The output file name is always `y.tab.c` and can't be changed. The @@ -855,10 +855,10 @@ I'd like you to check one by one. all of these variables that become the interfaces are of type `YYSTYPE`. -
-(yaccvars)
-Figure 2: Relationships among `yacc` related variables & functions -
+
+ figure 2: Relationships among `yacc` related variables & functions +
figure 2: Relationships among yacc related variables & functions
+
From 5a8beef6fc03452ffb6b1907ab6f28150869726b Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Sun, 18 Apr 2021 21:43:24 +0200 Subject: [PATCH 12/14] lang formatting --- load.md | 59 ++++++------- method.md | 46 +++++----- minimum.md | 184 +++++++++++++++++++-------------------- module.md | 92 ++++++++++---------- name.md | 52 +++++------ object.md | 96 ++++++++++----------- parser.md | 97 ++++++++++----------- spec.md | 246 ++++++++++++++++++++++++++--------------------------- 8 files changed, 439 insertions(+), 433 deletions(-) diff --git a/load.md b/load.md index 5350ab8..06e408d 100644 --- a/load.md +++ b/load.md @@ -15,7 +15,7 @@ Outline At the Ruby level, there are two procedures that can be used for loading: `require` and `load`. -```TODO-lang +```ruby require 'uri' # load the uri library load '/home/foo/.myrc' # read a resource file ``` @@ -41,7 +41,7 @@ Ruby's load path is in the global variable `$:`, which contains an array of strings. For example, displaying the content of the `$:` in the environment I usually use would show: -```TODO-lang +``` % ruby -e 'puts $:' /usr/lib/ruby/site_ruby/1.7 /usr/lib/ruby/site_ruby/1.7/i686-linux @@ -62,7 +62,7 @@ In a Windows environment, there will also be a drive letter. Then, let's try to `require` the standard library `nkf.so` from the load path. -```TODO-lang +```ruby require 'nkf' ``` @@ -75,7 +75,7 @@ extension libraries, for example `.dll` in a Windows environment or Let's do a simulation on my environment. `ruby` checks the following paths in sequential order. -```TODO-lang +``` /usr/lib/ruby/site_ruby/1.7/nkf.rb /usr/lib/ruby/site_ruby/1.7/nkf.so /usr/lib/ruby/site_ruby/1.7/i686-linux/nkf.rb @@ -95,7 +95,7 @@ global variable `$"`. In our case the string `"nkf.so"` has been put there. Even if the extension has been omitted when calling `require`, the file name in `$"` has the extension. -```TODO-lang +```ruby require 'nkf' # after loading nkf... p $" # ["nkf.so"] the file is locked @@ -123,7 +123,7 @@ file in `$:`. But it can only load Ruby programs. Furthermore, the extension cannot be omitted: the complete file name must always be given. -```TODO-lang +```ruby load 'uri.rb' # load the URI library that is part of the standard library ``` @@ -147,7 +147,7 @@ programs are basically evaluated at the top-level. It means the defined constants will be top-level constants and the defined methods will be function-style methods. -```TODO-lang +```ruby ### mylib.rb MY_OBJECT = Object.new def my_p(obj) @@ -171,7 +171,7 @@ the `module` statement, it does not serve any purpose, as everything that is at the top-level of the loaded file is put at the Ruby top-level. -```TODO-lang +```ruby require 'mylib' # whatever the place you require from, be it at the top-level module SandBox require 'mylib' # or in a module, the result is the same @@ -213,7 +213,7 @@ bothersome so we will limit ourselves to the case when no file extension is given. ▼ `rb_f_require()` (simplified version) -```TODO-lang +```c 5527 VALUE 5528 rb_f_require(obj, fname) 5529 VALUE obj, fname; @@ -272,9 +272,10 @@ actually like subroutines, and the two variables `feature` and `fname` are more or less their parameters. These variables have the following meaning. -|_. variable|_. meaning|_. example| -|`feature`|the library file name that will be put in `$"`|`uri.rb`、`nkf.so`| -|`fname`|the full path to the library|`/usr/lib/ruby/1.7/uri.rb`| +| variable | meaning | example | +| --------- | ---------------------------------------------- | -------------------------- | +| `feature` | the library file name that will be put in `$"` | `uri.rb`、`nkf.so` | +| `fname` | the full path to the library | `/usr/lib/ruby/1.7/uri.rb` | The name `feature` can be found in the function `rb_feature_p()`. This function checks if a file has been locked (we will look at it just @@ -297,7 +298,7 @@ searches the file `path` in the global load path `$'` only look at the main part. ▼ `rb_find_file()` (simplified version) -```TODO-lang +```c 2494 VALUE 2495 rb_find_file(path) 2496 VALUE path; @@ -339,7 +340,7 @@ only look at the main part. If we write what happens in Ruby we get the following: -```TODO-lang +```ruby tmp = [] # make an array $:.each do |path| # repeat on each element of the load path tmp.push path if path.length > 0 # check the path and push it @@ -374,7 +375,7 @@ code. Or more accurately, it is "up to just before the load". The code of `rb_f_require()`'s `load_rb` has been put below. ▼ `rb_f_require():load_rb` -```TODO-lang +```c 5625 load_rb: 5626 if (rb_feature_p(RSTRING(feature)->ptr, Qtrue)) 5627 return Qfalse; @@ -405,7 +406,7 @@ from one thread, and if during the loading another thread tries to load the same file, that thread will wait for the first loading to be finished. If it were not the case: -```TODO-lang +```ruby Thread.fork { require 'foo' # At the beginning of require, foo.rb is added to $" } # However the thread changes during the evaluation of foo.rb @@ -429,7 +430,7 @@ thread. That makes an exclusive lock. And in `rb_feature_p()`, we wait for the loading thread to end like the following. ▼ `rb_feature_p()` (second half) -```TODO-lang +```c 5477 rb_thread_t th; 5478 5479 while (st_lookup(loading_tbl, f, &th)) { @@ -463,7 +464,7 @@ We will now look at the loading process itself. Let's start by the part inside `rb_f_require()`'s `load_rb` loading Ruby programs. ▼ `rb_f_require()-load_rb-` loading -```TODO-lang +```c 5638 PUSH_TAG(PROT_NONE); 5639 if ((state = EXEC_TAG()) == 0) { 5640 rb_load(fname, 0); @@ -483,7 +484,7 @@ And the second argument `wrap` is folded with 0 because it is 0 in the above calling code. ▼ `rb_load()` (simplified edition) -```TODO-lang +```c void rb_load(fname, /* wrap=0 */) VALUE fname; @@ -584,7 +585,7 @@ all of them would be put in `eval.c` in the first place. Then, it is `rb_load_file()`. ▼ `rb_load_file()` -```TODO-lang +```c 865 void 866 rb_load_file(fname) 867 char *fname; @@ -605,7 +606,7 @@ non essential things have already been removed.

▼ `load_file()` (simplified edition)

-```TODO-lang +```c static void load_file(fname, /* script=0 */) char *fname; @@ -645,7 +646,7 @@ result. That's all for the loading code. Finally, the calls were quite deep so the callgraph of `rb_f_require()` is shown bellow. -```TODO-lang +``` rb_f_require ....eval.c rb_find_file ....file.c dln_find_file ....dln.c @@ -679,7 +680,7 @@ If you're using Windows, probably your IDE will have a tracer built in. Well, as The output is done on `stderr` so it was redirected using `2>&1`. -```TODO-lang +``` % strace ruby -e 'require "rational"' 2>&1 | grep '^open' open("/etc/ld.so.preload", O_RDONLY) = -1 ENOENT open("/etc/ld.so.cache", O_RDONLY) = 3 @@ -707,7 +708,7 @@ start with `rb_f_require()`'s `load_dyna`. However, we do not need the part about locking anymore so it was removed. ▼ `rb_f_require()`-`load_dyna` -```TODO-lang +```c 5607 { 5608 int volatile old_vmode = scope_vmode; 5609 @@ -745,7 +746,7 @@ Since I'm using `gcc` on Linux, I can create a runnable program in the following manner. -```TODO-lang +``` % gcc hello.c ``` @@ -754,7 +755,7 @@ According to the file name, this is probably an "Hello, World!" program. In UNIX, `gcc` outputs a program into a file named `a.out` by default, so you can subsequently execute it in the following way: -```TODO-lang +``` % ./a.out Hello, World! ``` @@ -928,7 +929,7 @@ but its structure is simple because of some reasons. Take a look at the outline first. ▼ `dln_load()` (outline) -```TODO-lang +```c void* dln_load(file) const char *file; @@ -969,7 +970,7 @@ Supported APIs are as follows: First, let's start with the API code for the `dlopen` series. ▼ `dln_load()`-`dlopen()` -```TODO-lang +```c 1254 void* 1255 dln_load(file) 1256 const char *file; @@ -1049,7 +1050,7 @@ As for Win32, `LoadLibrary()` and `GetProcAddress()` are used. It is very general Win32 API which also appears on MSDN. ▼ `dln_load()`-Win32 -```TODO-lang +```c 1254 void* 1255 dln_load(file) 1256 const char *file; diff --git a/method.md b/method.md index ad7b0ea..f0ebc76 100644 --- a/method.md +++ b/method.md @@ -22,7 +22,7 @@ confusing, let's strictly define terms here: -```TODO-lang +```ruby m(a) # a is a "normal argument" m(*list) # list is an "array argument" m(&block) # block is a "block argument" @@ -49,14 +49,14 @@ parameters" will be discussed in the next chapter.

▼The Source Program

-```TODO-lang +```ruby obj.method(7,8) ```

▼Its Syntax Tree

-```TODO-lang +``` NODE_CALL nd_mid = 9049 (method) nd_recv: @@ -92,7 +92,7 @@ Now, let's look at the handler of `NODE_CALL` in `rb_eval()`.

▼ `rb_eval()` − `NODE_CALL`

-```TODO-lang +```c 2745 case NODE_CALL: 2746 { 2747 VALUE recv; @@ -136,7 +136,7 @@ Therefore, something like the following is a boilerplate: -```TODO-lang +```c int argc; VALUE *argv; /* used in SETUP_ARGS */ TMP_PROTECT; @@ -152,7 +152,7 @@ Let's look at it:

▼ `SETUP_ARGS()`

-```TODO-lang +```c 1780 #define SETUP_ARGS(anode) do {\ 1781 NODE *n = anode;\ 1782 if (!n) {\ no arguments @@ -207,7 +207,7 @@ If I write in the code (and tidy up a little), it becomes as follows. -```TODO-lang +```c /***** else if clause、argc!=0 *****/ int i; n = anode; @@ -266,7 +266,7 @@ of them.

▼ `rb_call()` (simplified)

-```TODO-lang +```c static VALUE rb_call(klass, recv, mid, argc, argv, scope) VALUE klass, recv; @@ -319,7 +319,7 @@ What is looking up the cache is the first half of `rb_call()`. Only with -```TODO-lang +```c ent = cache + EXPR1(klass, mid); ``` @@ -349,7 +349,7 @@ Next, let's examine the structure of the method cache in detail.

▼Method Cache

-```TODO-lang +```c 180 #define CACHE_SIZE 0x800 181 #define CACHE_MASK 0x7ff 182 #define EXPR1(c,m) ((((c)>>3)^(m))&CACHE_MASK) @@ -431,7 +431,7 @@ look at it by dividing into small portions. Starting with the outline:

▼ `rb_call0()` (Outline)

-```TODO-lang +```c 4482 static VALUE 4483 rb_call0(klass, recv, id, oid, argc, argv, body, nosuper) 4484 VALUE klass, recv; @@ -521,7 +521,7 @@ could be ignored. The important things are only `NODE_CFUNC`, `NODE_SCOPE` and

▼ `PUSH_FRAME() POP_FRAME()`

-```TODO-lang +```c 536 #define PUSH_FRAME() do { \ 537 struct FRAME _frame; \ 538 _frame.prev = ruby_frame; \ @@ -564,7 +564,7 @@ following line:

▼ `rb_call0()` − `NODE_CFUNC` (simplified)

-```TODO-lang +```c case NODE_CFUNC: result = call_cfunc(body->nd_cfnc, recv, len, argc, argv); break; @@ -576,7 +576,7 @@ Then, as for `call_cfunc()` ...

▼ `call_cfunc()` (simplified)

-```TODO-lang +```c 4394 static VALUE 4395 call_cfunc(func, recv, len, argc, argv) 4396 VALUE (*func)(); @@ -639,7 +639,7 @@ This part forms the foundation of Ruby.

▼ `rb_call0()` − `NODE_SCOPE` (outline)

-```TODO-lang +```c 4568 case NODE_SCOPE: 4569 { 4570 int state; @@ -747,7 +747,7 @@ But before that, I'd like you to first check the syntax tree of the method again -```TODO-lang +``` % ruby -rnodedump -e 'def m(a) nil end' NODE_SCOPE nd_rval = (null) @@ -793,7 +793,7 @@ For example, if you write a definition as below, -```TODO-lang +```ruby def m(a, b, c = nil, *rest) lvar1 = nil end @@ -804,7 +804,7 @@ local variable IDs are assigned as follows. -```TODO-lang +``` 0 1 2 3 4 5 6 $_ $~ a b c rest lvar1 ``` @@ -816,7 +816,7 @@ Taking this into considerations, let's look at the code.

▼ `rb_call0()` − `NODE_SCOPE` −assignments of arguments

-```TODO-lang +```c 4601 if (nd_type(body) == NODE_ARGS) { /* no body */ 4602 node = body; /* NODE_ARGS */ 4603 body = 0; /* the method body */ @@ -904,7 +904,7 @@ It means the following form: -```TODO-lang +```ruby super ``` @@ -919,7 +919,7 @@ If there's not, the one after option parameters are assigned seems better. -```TODO-lang +```ruby def m(a, b, *rest) super # probably 5, 6, 7, 8 should be passed end @@ -956,7 +956,7 @@ and `NODE_ZSUPER` is `super` without arguments.

▼ `rb_eval()` − `NODE_SUPER`

-```TODO-lang +```c 2780 case NODE_SUPER: 2781 case NODE_ZSUPER: 2782 { @@ -1044,7 +1044,7 @@ What happens if `String.new` is replaced by new definition and `super` is called -```TODO-lang +```ruby def String.new super end diff --git a/minimum.md b/minimum.md index 4944f53..7a02db5 100644 --- a/minimum.md +++ b/minimum.md @@ -32,7 +32,7 @@ Everything that can be manipulated in a Ruby program is an object. There are no primitives as Java's `int` and `long`. For instance if we write as below it denotes a string object with content `content`. -```TODO-lang +```ruby "content" ``` @@ -40,7 +40,7 @@ I casually called it a string object but to be precise this is an expression wh a string object. Therefore if we write it several times each time another string object is generated. -```TODO-lang +```ruby "content" "content" "content" @@ -51,7 +51,7 @@ Here three string objects with content `content` are generated. By the way, objects just existing there can't be seen by programmers. Let's show how to print them on the terminal. -```TODO-lang +```ruby p("content") # Shows "content" ``` @@ -71,7 +71,7 @@ Now, let's explain some more the expressions which directly generate objects, the so-called literals. First the integers and floating point numbers. -```TODO-lang +```ruby # Integer 1 2 @@ -89,7 +89,7 @@ I'm repeating myself but there are no primitives in Ruby. Below an array object is generated. -```TODO-lang +```ruby [1, 2, 3] ``` @@ -97,20 +97,20 @@ This program generates an array which consists of the three integers 1, 2 and 3 in that order. As the elements of an array can be arbitrary objects the following is also possible. -```TODO-lang +```ruby [1, "string", 2, ["nested", "array"]] ``` And finally, a hash table is generated by the expression below. -```TODO-lang +```ruby {"key"=>"value", "key2"=>"value2", "key3"=>"value3"} ``` A hash table is a structure which expresses one-to-one relationships between arbitrary objects. The above line creates a table which stores the following relationships. -```TODO-lang +``` "key" → "value" "key2" → "value2" "key3" → "value3" @@ -125,7 +125,7 @@ We can call methods on an object. In C++ Jargon they are member functions. I don't think it's necessary to explain what a method is. I'll just explain the notation. -```TODO-lang +```ruby "content".upcase() ``` @@ -134,13 +134,13 @@ As `upcase` is a method which returns a new string with the small letters replaced by capital letters, we get the following result. -```TODO-lang +```ruby p("content".upcase()) # Shows "CONTENT" ``` Method calls can be chained. -```TODO-lang +```ruby "content".upcase().downcase() ``` @@ -156,21 +156,21 @@ The Program In Ruby we can just write expressions and it becomes a program. One doesn't need to define a `main()` as in C++ or Java. -```TODO-lang +```ruby p("content") ``` This is a complete Ruby program. If we put this into a file called `first.rb` we can execute it from the command line as follows. -```TODO-lang +``` % ruby first.rb "content" ``` With the `-e` option of the `ruby` program we don't even need to create a file. -```TODO-lang +``` % ruby -e 'p("content")' "content" ``` @@ -183,7 +183,7 @@ Having top-level is a characteristic trait of Ruby as a scripting language. In Ruby, one line is usually one statement. A semicolon at the end isn't necessary. Therefore the program below is interpreted as three statements. -```TODO-lang +```ruby p("content") p("content".upcase()) p("CONTENT".downcase()) @@ -191,7 +191,7 @@ p("CONTENT".downcase()) When we execute it it looks like this. -```TODO-lang +``` % ruby second.rb "content" "CONTENT" @@ -210,7 +210,7 @@ by the beginning of the name. Local variables start with a small letter or an underscore. One can write assignments by using "`=`". -```TODO-lang +```ruby str = "content" arr = [1,2,3] ``` @@ -220,7 +220,7 @@ not necessary. Because variables don't have types, we can assign any kind of objects indiscriminately. The program below is completely legal. -```TODO-lang +```ruby lvar = "content" lvar = [1,2,3] lvar = 1 @@ -233,14 +233,14 @@ The above was just an example for the sake of it. Variable reference has also a pretty sensible notation. -```TODO-lang +```ruby str = "content" p(str) # Shows "content" ``` In addition let's check the point that a variable hold a reference by taking an example. -```TODO-lang +```ruby a = "content" b = a c = b @@ -266,7 +266,7 @@ Let's say for now that the top level is one local scope. Constants start with a capital letter. They can only be assigned once (at their creation). -```TODO-lang +```ruby Const = "content" PI = 3.1415926535 @@ -283,7 +283,7 @@ Therefore, it is allowed due to practical requirements and there's no other choi but essentially there should be an error. In fact, up until version 1.1 there really was an error. -```TODO-lang +```ruby C = 1 C = 2 # There is a warning but ideally there should be an error. ``` @@ -311,7 +311,7 @@ Since Ruby has a wide abundance of control structures, just lining up them can be a huge task. For now, I just mention that there are `if` and `while`. -```TODO-lang +```ruby if i < 10 then # body end @@ -352,7 +352,7 @@ And on this `String` class the methods `upcase`, `downcase`, `strip` and many others are defined. So it looks as if each string object can respond to all these methods. -```TODO-lang +```ruby # They all belong to the String class, # hence the same methods are defined "content".upcase() @@ -369,7 +369,7 @@ In a static language a compiler error occurs but in Ruby there is a runtime exception. Let's try it out. For this kind of programs the `-e` option is handy. -```TODO-lang +``` % ruby -e '"str".bad_method()' -e:1: undefined method `bad_method' for "str":String (NoMethodError) ``` @@ -390,7 +390,7 @@ Up to now we talked about already defined classes. We can of course also define our own classes. To define classes we use the `class` statement. -```TODO-lang +```ruby class C end ``` @@ -398,7 +398,7 @@ end This is the definition of a new class `C`. After we defined it we can use it as follows. -```TODO-lang +```ruby class C end c = C.new() # create an instance of C and assign it to the variable c @@ -424,7 +424,7 @@ on this object ( usually new). If we look at the example below, it's pretty obvious that the creation of an instance doesn't differ from a normal method call. -```TODO-lang +```ruby S = "content" class C end @@ -437,7 +437,7 @@ So `new` is not a reserved word in Ruby. And we can also use `p` for an instance of a class even immediately after its creation. -```TODO-lang +```ruby class C end @@ -454,7 +454,7 @@ Oh, I completely forgot to mention about the notation of method names: So `Object#new` and `Object.new` are completely different things, we have to separate them strictly. -```TODO-lang +```ruby obj = Object.new() # Object.new obj.new() # Object#new ``` @@ -469,7 +469,7 @@ Even if we can define classes, it is useless if we cannot define methods. Let's define a method for our class `C`. -```TODO-lang +```ruby class C def myupcase( str ) return str.upcase() @@ -485,7 +485,7 @@ And we can use any number of parameters. Let's use the defined method. Methods are usually called from the outside by default. -```TODO-lang +```ruby c = C.new() result = c.myupcase("content") p(result) # Shows "CONTENT" @@ -494,7 +494,7 @@ p(result) # Shows "CONTENT" Of course if you get used to it you don't need to assign every time. The line below gives the same result. -```TODO-lang +```ruby p(C.new().myupcase("content")) # Also shows "CONTENT" ``` @@ -505,7 +505,7 @@ who is itself (the instance on which the method was called) is always saved and can be picked up in `self`. Like the `this` in C++ or Java. Let's check this out. -```TODO-lang +```ruby class C def get_self() return self @@ -523,7 +523,7 @@ We could confirm that `self` is `c` during the method call on `c`. Then what is the way to call a method on itself? What first comes to mind is calling via `self`. -```TODO-lang +```ruby class C def my_p( obj ) self.real_my_p(obj) # called a method against oneself @@ -541,7 +541,7 @@ But always adding the `self` when calling an own method is tedious. Hence, it is designed so that one can omit the called method (the receiver) whenever one calls a method on `self`. -```TODO-lang +```ruby class C def my_p( obj ) real_my_p(obj) # You can call without specifying the receiver @@ -567,7 +567,7 @@ In the fashion of Ruby's variable naming convention, the variable type can be determined by the first a few characters. For instance variables it's an `@`. -```TODO-lang +```ruby class C def set_i(value) @i = value @@ -587,7 +587,7 @@ Instance variables differ a bit from the variables seen before: We can reference them without assigning (defining) them. To see what happens we add the following lines to the code above. -```TODO-lang +```ruby c = C.new() p(c.get_i()) # Shows nil ``` @@ -599,7 +599,7 @@ but that's just the way it is. We can use `nil` like a literal as well. -```TODO-lang +```ruby p(nil) # Shows nil ``` @@ -612,7 +612,7 @@ In this case we don't change the `new` method, we define the `initialize` method. When we do this, it gets called within `new`. -```TODO-lang +```ruby class C def initialize() @i = "ok" @@ -648,7 +648,7 @@ Anyway let's try it out. Let our created class inherit from another class. To inherit from another class ( or designate a superclass) write the following. -```TODO-lang +```ruby class C < SuperClassName end ``` @@ -661,7 +661,7 @@ Handing over means that the methods which were defined in the superclass also work in the subclass as if they were defined in there once more. Let's check it out. -```TODO-lang +```ruby class C def hello() return "hello" @@ -679,7 +679,7 @@ p(sub.hello()) # Shows "hello" the class `Sub` as well. Of course we don't need to assign variables. The above is the same as the line below. -```TODO-lang +```ruby p(Sub.new().hello()) ``` @@ -688,7 +688,7 @@ In C++ and Object Pascal (Delphi) it's only possible to overwrite functions explicitly defined with the keyword `virtual` but in Ruby every method can be overwritten unconditionally. -```TODO-lang +```ruby class C def hello() return "Hello" @@ -744,7 +744,7 @@ become defined. Then, since the namespace of instance variables is completely flat based on each instance, it can be accessed by a method of whichever class. -```TODO-lang +```ruby class A def initialize() # called from when processing new() @i = "ok" @@ -783,7 +783,7 @@ In short, modules are classes for which a superclass cannot be designated and instances cannot be created. For the definition we write as follows. -```TODO-lang +```ruby module M end ``` @@ -791,7 +791,7 @@ end Here the module `M` was defined. Methods are defined exactly the same way as for classes. -```TODO-lang +```ruby module M def myupcase( str ) return str.upcase() @@ -803,7 +803,7 @@ But because we cannot create instances, we cannot call them directly. To do that, we use the module by "including" it into other classes. Then we become to be able to deal with it as if a class inherited the module. -```TODO-lang +```ruby module M def myupcase( str ) return str.upcase() @@ -826,7 +826,7 @@ There's no limit on defining methods or accessing instance variables. I said we cannot specify any superclass of a module, but other modules can be included. -```TODO-lang +```ruby module M end @@ -841,7 +841,7 @@ above modules. The example below also contains the inheritance of methods. -```TODO-lang +```ruby module OneMore def method_OneMore() p("OneMore") @@ -875,7 +875,7 @@ Besides, the class `C` also has a superclass. How is its relationship to modules? For instance, let's think of the following case. -```TODO-lang +```ruby # modcls.rb class Cls @@ -902,7 +902,7 @@ Which will be shown in this case, `"class"` or `"module"`? In other words, which one is "closer", class or module? We'd better ask Ruby about Ruby, thus let's execute it: -```TODO-lang +``` % ruby modcls.rb "module" ``` @@ -940,19 +940,19 @@ The explanation will also be relatively attentive. First a repetition of constants. As a constant begins with a capital letter the definition goes as follows. -```TODO-lang +```ruby Const = 3 ``` Now we reference the constant in this way. -```TODO-lang +```ruby p(Const) # Shows 3 ``` Actually we can also write this. -```TODO-lang +```ruby p(::Const) # Shows 3 in the same way. ``` @@ -968,7 +968,7 @@ However mentioning both is cumbersome, so I'll just subsume them under class definition. When one enters a class definition the level for constants rises ( as if entering a directory). -```TODO-lang +```ruby class SomeClass Const = 3 end @@ -986,7 +986,7 @@ As we can create a directory in a directory, we can create a class inside a class. For instance like this: -```TODO-lang +```ruby class C # ::C class C2 # ::C::C2 class C3 # ::C::C2::C3 @@ -1000,7 +1000,7 @@ should we always write its full name? Of course not. As with the filesystem, if one is inside the same class definition one can skip the `::`. It becomes like that: -```TODO-lang +```ruby class SomeClass Const = 3 p(Const) # Shows 3. @@ -1017,7 +1017,7 @@ Let's add that we can of course also view a constant inside a method. The reference rules are the same as within the class definition (outside the method). -```TODO-lang +```ruby class C Const = "ok" def test() @@ -1038,7 +1038,7 @@ and almost all the rest is executed in the apparent order. Look for instance at the following code. I used various constructions which have been used before. -```TODO-lang +```ruby 1: p("first") 2: 3: class C < Object @@ -1056,15 +1056,17 @@ I used various constructions which have been used before. This program is executed in the following order: -|`1: p("first")`|Shows `"first"`| -|`3: < Object`|The constant `Object` is referenced and the class object `Object` is gained| -|`3: class C`|A new class object with superclass `Object` is generated, and assigned to the constant C| -|`4: Const = "in C"`|Assigning the value `"in C"` to the constant `::C::Const`| -|`6: p(Const)`|Showing the constant `::C::Const` hence `"in C"`| -|`8: def myupcase(...)...end`|Define `C#myupcase`| -|`13: C.new().myupcase(...)`|Refer the constant `C`, call the method `new` on it, and then `myupcase` on the return value| -|`9: return str.upcase()`|Returns `"CONTENT"`| -|`13: p(...)`|Shows `"CONTENT"`| +| line | description | +| ---------------------------- | -------------------------------------------------------------------------------------------- | +| `1: p("first")` | Shows `"first"` | +| `3: < Object` | The constant `Object` is referenced and the class object `Object` is gained | +| `3: class C` | A new class object with superclass `Object` is generated, and assigned to the constant C | +| `4: Const = "in C"` | Assigning the value `"in C"` to the constant `::C::Const` | +| `6: p(Const)` | Showing the constant `::C::Const` hence `"in C"` | +| `8: def myupcase(...)...end` | Define `C#myupcase` | +| `13: C.new().myupcase(...)` | Refer the constant `C`, call the method `new` on it, and then `myupcase` on the return value | +| `9: return str.upcase()` | Returns `"CONTENT"` | +| `13: p(...)` | Shows `"CONTENT"` | ### The Scope of Local Variables @@ -1075,7 +1077,7 @@ have each completely independent local variable scope. In other words, the `lvar` variables in the following program are all different variables, and they do not influence each other. -```TODO-lang +```ruby lvar = 'toplevel' class C @@ -1134,7 +1136,7 @@ Well, since `self` is setup everywhere, The `self` in the class definition is the class itself (the class object). Hence it would look like this. -```TODO-lang +```ruby class C p(self) # C end @@ -1143,7 +1145,7 @@ end What should this be good for? In fact, we've already seen an example in which it is very useful. This one. -```TODO-lang +```ruby module M end class C @@ -1162,7 +1164,7 @@ because we have not finished the talk about class definition statement. In Ruby the loading of libraries also happens at runtime. Normally one writes this. -```TODO-lang +```ruby require("library_name") ``` @@ -1174,7 +1176,7 @@ As there is no concept like Java packages in Ruby, when we'd like to separate namespaces, it is done by putting files into a directory. -```TODO-lang +```ruby require("somelib/file1") require("somelib/file2") ``` @@ -1185,7 +1187,7 @@ distinction of files, so one can see classes defined in another file without any special preparation. To partition the namespace of class names one has to explicitly nest modules as shown below. -```TODO-lang +```ruby # example of the namespace partition of net library module Net class SMTP @@ -1211,7 +1213,7 @@ the scope of constants, but I want you to completely forget that. There is more about constants. Firstly one can also see constants in the "outer" class. -```TODO-lang +```ruby Const = "ok" class C p(Const) # Shows "ok" @@ -1222,7 +1224,7 @@ The reason why this is designed in this way is because this becomes useful when modules are used as namespaces. Let's explain this by adding a few things to the previous example of `net` library. -```TODO-lang +```ruby module Net class SMTP # Uses Net::SMTPHelper in the methods @@ -1240,7 +1242,7 @@ The outer class can be referenced no matter how many times it is nesting. When the same name is defined on different levels, the one which will first be found from within will be referred to. -```TODO-lang +```ruby Const = "far" class C Const = "near" # This one is closer than the one above @@ -1256,7 +1258,7 @@ There's another way of searching constants. If the toplevel is reached when going further and further outside then the own superclass is searched for the constant. -```TODO-lang +```ruby class A Const = "ok" end @@ -1271,7 +1273,7 @@ Let's summarize. When looking up a constant, first the outer classes is searched then the superclasses. This is quite contrived, but let's assume a class hierarchy as follows. -```TODO-lang +```ruby class A1 end class A2 < A1 @@ -1324,7 +1326,7 @@ In this kind of situation, in Ruby, we can check in practice. It's because there's "a method which returns the class (class object) to which an object itself belongs", `Object#class`. -```TODO-lang +```ruby p("string".class()) # String is shown p(String.class()) # Class is shown p(Object.class()) # Class is shown @@ -1333,7 +1335,7 @@ p(Object.class()) # Class is shown Apparently `String` belongs to the class named `Class`. Then what's the class of `Class`? -```TODO-lang +```ruby p(Class.class()) # Class is shown ``` @@ -1357,7 +1359,7 @@ Let's change the target and think about modules. As modules are also objects, there also should be a class for them. Let's see. -```TODO-lang +```ruby module M end p(M.class()) # Module is shown @@ -1366,7 +1368,7 @@ p(M.class()) # Module is shown The class of a module seems to be `Module`. And what should be the class of the class `Module`? -```TODO-lang +```ruby p(Module.class()) # Class ``` @@ -1376,7 +1378,7 @@ Now we change the direction and examine the inheritance relationships. What's the superclass of `Class` and `Module`? In Ruby, we can find it out with `Class#superclass`. -```TODO-lang +```ruby p(Class.superclass()) # Module p(Module.superclass()) # Object p(Object.superclass()) # nil @@ -1416,7 +1418,7 @@ Actually In Ruby there's also a means to define methods for individual objects ( not depending on the class. To do this, you can write this way. -```TODO-lang +```ruby obj = Object.new() def obj.my_first() puts("My first singleton method") @@ -1444,7 +1446,7 @@ deletes a file entry from the filesystem. In Ruby it can be used directly as the singleton method `unlink` of the `File` class. Let's try it out. -```TODO-lang +```ruby File.unlink("core") # deletes the coredump ``` @@ -1466,7 +1468,7 @@ As with constants, they belong to a class, and they can be referenced and assigned from both the class and its instances. Let's look at an example. The beginning of the name is `@@`. -```TODO-lang +```ruby class C @@cvar = "ok" p(@@cvar) # "ok" is shown @@ -1484,7 +1486,7 @@ before an assignment like the one shown below leads to a runtime error. There is an ´@´ in front but the behavior differs completely from instance variables. -```TODO-lang +``` % ruby -e ' class C @@cvar @@ -1500,7 +1502,7 @@ Class variables are inherited. Or saying it differently, a variable in a superior class can be assigned and referenced in the inferior class. -```TODO-lang +```ruby class A @@cvar = "ok" end @@ -1521,7 +1523,7 @@ Global Variables At last there are also global variables. They can be referenced from everywhere and assigned everywhere. The first letter of the name is a `$`. -```TODO-lang +```ruby $gvar = "global variable" p($gvar) # Shows "global variable" ``` diff --git a/module.md b/module.md index 5a5ea86..500bac3 100644 --- a/module.md +++ b/module.md @@ -114,7 +114,7 @@ general noun and `FRAME` when it means `struct FRAME`.

▼ `ruby_frame`

-```TODO-lang +```c 16 extern struct FRAME { 17 VALUE self; /* self */ 18 int argc; /* the argument count */ @@ -179,7 +179,7 @@ For instance, -```TODO-lang +```ruby class C def orig() end alias ali orig @@ -206,7 +206,7 @@ I'll call this frame `SCOPE`.

▼ `ruby_scope`

-```TODO-lang +```c 36 extern struct SCOPE { 37 struct RBasic super; 38 ID *local_tbl; /* an array of the local variable names */ @@ -229,7 +229,7 @@ like this: -```TODO-lang +```ruby def make_counter lvar = 0 return Proc.new { lvar += 1 } @@ -270,7 +270,7 @@ This frame will also be briefly written as `BLOCK` as in the same manner as

▼ `ruby_block`

-```TODO-lang +```c 580 static struct BLOCK *ruby_block; 559 struct BLOCK { @@ -329,7 +329,7 @@ But for consistency I'll call it `ITER`.

▼ `ruby_iter`

-```TODO-lang +```c 767 static struct iter *ruby_iter; 763 struct iter { @@ -368,7 +368,7 @@ already seen in Part 2. From now on, I'll call it just `VARS`.

▼ `struct RVarmap`

-```TODO-lang +```c 52 struct RVarmap { 53 struct RBasic super; 54 ID id; /* the name of the variable */ @@ -425,7 +425,7 @@ Its struct is ...

▼ `ruby_cref`

-```TODO-lang +```c 847 static NODE *ruby_cref = 0; (eval.c) @@ -450,7 +450,7 @@ explain the actual appearance. -```TODO-lang +```ruby class A class B class C @@ -474,7 +474,7 @@ Therefore, the same state as Fig.4 will be expressed in the following notation: -```TODO-lang +``` A ← B ← C ``` @@ -543,7 +543,7 @@ Now, let's start to look at the codes.

▼The Source Program

-```TODO-lang +```ruby module M a = 1 end @@ -552,7 +552,7 @@ end

▼Its Syntax Tree

-```TODO-lang +``` NODE_MODULE nd_cname = 9621 (M) nd_body: @@ -588,7 +588,7 @@ it has already became unnecessary to show the original code.

▼ `rb_eval()` − `NODE_MODULE` (simplified)

-```TODO-lang +```c case NODE_MODULE: { VALUE module; @@ -623,7 +623,7 @@ we can do "additional" definitions on the same one module any number of times. -```TODO-lang +```ruby module M def a # M#a is deifned end @@ -665,7 +665,7 @@ large amounts.

▼ `module_setup()`

-```TODO-lang +```c 3424 static VALUE 3425 module_setup(module, n) 3426 VALUE module; @@ -763,7 +763,7 @@ Consequently, it could be summarized as follows:

▼ `module_setup` (simplified)

-```TODO-lang +```c static VALUE module_setup(module, node) VALUE module; @@ -822,7 +822,7 @@ Let's examine the contents of these macros and what is done.

▼ `PUSH_SCOPE() POP_SCOPE()`

-```TODO-lang +```c 852 #define PUSH_SCOPE() do { \ 853 volatile int _vmode = scope_vmode; \ 854 struct SCOPE * volatile _old; \ @@ -887,7 +887,7 @@ following part of `module_setup` prepares the array.

▼The preparation of the local variable slots

-```TODO-lang +```c 3444 if (node->nd_tbl) { 3445 VALUE *vars = TMP_ALLOC(node->nd_tbl[0]+1); 3446 *vars++ = (VALUE)node; @@ -931,7 +931,7 @@ access in `gc.c`.

▼ `rb_gc_mark_children()` — `T_SCOPE`

-```TODO-lang +```c 815 case T_SCOPE: 816 if (obj->as.scope.local_vars && (obj->as.scope.flags & SCOPE_MALLOC)) { @@ -962,7 +962,7 @@ line of the next line:

▼ `ruby_scope->local_tbl`

-```TODO-lang +```c 3449 ruby_scope->local_tbl = node->nd_tbl; (eval.c) @@ -1011,7 +1011,7 @@ beginning of `module_setup()`. Its typical usage is this: -```TODO-lang +```c VALUE *ptr; TMP_PROTECT; @@ -1025,7 +1025,7 @@ is that ... Let's see its definition.

▼ `TMP_ALLOC()`

-```TODO-lang +```c 1769 #ifdef C_ALLOCA 1770 # define TMP_PROTECT NODE * volatile tmp__protect_tmp=0 1771 # define TMP_ALLOC(n) \ @@ -1081,7 +1081,7 @@ Here is the code for it: -```TODO-lang +```c PUSH_CLASS(); ruby_class = module; : @@ -1096,7 +1096,7 @@ We can understand it unexpectedly easily by looking at the definition.

▼ `PUSH_CLASS() POP_CLASS()`

-```TODO-lang +```c 841 #define PUSH_CLASS() do { \ 842 VALUE _class = ruby_class @@ -1128,7 +1128,7 @@ In `module_setup()`, it is pushed as follows: -```TODO-lang +```TODO PUSH_CREF(module); ruby_frame->cbase = (VALUE)ruby_cref; : @@ -1143,7 +1143,7 @@ Let's also see the definitions of `PUSH_CREF()` and `POP_CREF()`.

▼ `PUSH_CREF() POP_CREF()`

-```TODO-lang +```c 849 #define PUSH_CREF(c) \ ruby_cref = rb_node_newnode(NODE_CREF,(c),0,ruby_cref) 850 #define POP_CREF() ruby_cref = ruby_cref->nd_next @@ -1246,7 +1246,7 @@ As the next topic of the module definitions, let's look at the method definition

▼The Source Program

-```TODO-lang +```ruby def m(a, b, c) nil end @@ -1255,7 +1255,7 @@ end

▼Its Syntax Tree

-```TODO-lang +``` NODE_DEFN nd_mid = 9617 (m) nd_noex = 2 (NOEX_PRIVATE) @@ -1290,7 +1290,7 @@ indirectly call `rb_raise() rb_warn() rb_warning()`.

▼ `rb_eval()` − `NODE_DEFN` (simplified)

-```TODO-lang +```c NODE *defn; int noex; @@ -1343,7 +1343,7 @@ part is the next two lines. -```TODO-lang +```c defn = copy_node_scope(node->nd_defn, ruby_cref); rb_add_method(ruby_class, node->nd_mid, defn, noex); ``` @@ -1371,7 +1371,7 @@ the usages at these two places are almost the same.

▼ `copy_node_scope()`

-```TODO-lang +```c 1752 static NODE* 1753 copy_node_scope(node, rval) 1754 NODE *node; @@ -1421,7 +1421,7 @@ The next thing is `rb_add_method()` that is the function to register a method en

▼ `rb_add_method()`

-```TODO-lang +```c 237 void 238 rb_add_method(klass, mid, node, noex) 239 VALUE klass; @@ -1457,7 +1457,7 @@ I prepared `nodedump-method` for this kind of purposes. -```TODO-lang +``` % ruby -e ' class C def m(a) @@ -1542,7 +1542,7 @@ follows:

▼ `rb_eval()` − `NODE_LVAR`

-```TODO-lang +```c 2975 case NODE_LVAR: 2976 if (ruby_scope->local_vars == 0) { 2977 rb_bug("unexpected local variable"); @@ -1580,7 +1580,7 @@ Take a look at the following code: -```TODO-lang +```ruby class A C = 5 def A.new @@ -1623,7 +1623,7 @@ The ordinary constant references to which `::` is not attached, become

▼ `rb_eval()` − `NODE_CONST`

-```TODO-lang +```c 2994 case NODE_CONST: 2995 result = ev_const_get(RNODE(ruby_frame->cbase), node->nd_vid, self); 2996 break; @@ -1673,7 +1673,7 @@ Since only `ev_const_get()` is left, we'll look at it.

▼ `ev_const_get()`

-```TODO-lang +```c 1550 static VALUE 1551 ev_const_get(cref, id, self) 1552 NODE *cref; @@ -1724,7 +1724,7 @@ Let's look at it.

▼ `cvar_cbase()`

-```TODO-lang +```c 1571 static VALUE 1572 cvar_cbase() 1573 { @@ -1749,7 +1749,7 @@ seems. This feature is added to counter the following kind of code: -```TODO-lang +```ruby class C class C @@cvar = 1 @@cvar = 1 class << C def C.m @@ -1824,14 +1824,14 @@ First, following the standard, let's start with the syntax tree.

▼The Source Program

-```TODO-lang +```ruby a, b = 7, 8 ```

▼Its Syntax Tree

-```TODO-lang +``` NODE_MASGN nd_head: NODE_ARRAY [ @@ -1865,7 +1865,7 @@ value EXPAND". We are curious about what this node is doing. Let's see.

▼ `rb_eval()` − `NODE_REXPAND`

-```TODO-lang +```c 2575 case NODE_REXPAND: 2576 result = avalue_to_svalue(rb_eval(self, node->nd_head)); 2577 break; @@ -1882,7 +1882,7 @@ evaluated. This enables even the following code: -```TODO-lang +```ruby a, b = b, a # swap variables in oneline ``` @@ -1892,7 +1892,7 @@ Let's look at `NODE_MASGN` in the left-hand side.

▼ `rb_eval()` − `NODE_MASGN`

-```TODO-lang +```c 2923 case NODE_MASGN: 2924 result = massign(self, node, rb_eval(self, node->nd_value),0); 2925 break; @@ -1912,7 +1912,7 @@ Here is only the evaluation of the right-hand side, the rests are delegated to

▼ `massi` ……

-```TODO-lang +```c 3917 static VALUE 3918 massign(self, node, val, pcall) 3919 VALUE self; @@ -1956,7 +1956,7 @@ final appearance is shown as follows:

▼ `massign()` (simplified)

-```TODO-lang +```c static VALUE massign(self, node, val /* , pcall=0 */) VALUE self; diff --git a/name.md b/name.md index ebddf53..59d536f 100644 --- a/name.md +++ b/name.md @@ -24,7 +24,7 @@ However, data structures other than hash tables can, of course, record one-to-one relations. For example, a list of the following structs will suffice for this purpose. -```TODO-lang +```c struct entry { ID key; VALUE val; @@ -44,7 +44,7 @@ created by Matsumoto, rather: ▼ `st.c` credits -```TODO-lang +```c 1 /* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ @@ -125,7 +125,7 @@ The following is the data type of `st_table`. ▼ `st_table` -```TODO-lang +```c 9 typedef struct st_table st_table; 16 struct st_table { @@ -140,7 +140,7 @@ The following is the data type of `st_table`. ▼ `struct st_table_entry` -```TODO-lang +```c 16 struct st_table_entry { 17 unsigned int hash; 18 char *key; @@ -167,7 +167,7 @@ So, let us comment on `st_hash_type`. ▼ `struct st_hash_type` -```TODO-lang +```c 11 struct st_hash_type { 12 int (*compare)(); /* comparison function */ 13 int (*hash)(); /* hash function */ @@ -178,7 +178,7 @@ So, let us comment on `st_hash_type`. This is still Chapter 3 so let us examine it attentively. -```TODO-lang +```c int (*compare)() ``` @@ -186,7 +186,7 @@ This part shows, of course, the member `compare` which has a data type of "a pointer to a function that returns an `int`". `hash` is also of the same type. This variable is substituted in the following way: -```TODO-lang +```c int great_function(int n) { @@ -201,7 +201,7 @@ great_function(int n) And it is called like this: -```TODO-lang +```c (*f)(7); } ``` @@ -244,7 +244,7 @@ integer data type keys. ▼ `st_init_numtable()` -```TODO-lang +```c 182 st_table* 183 st_init_numtable() 184 { @@ -260,7 +260,7 @@ Regarding this `type_numhash`: ▼ `type_numhash` -```TODO-lang +```c 37 static struct st_hash_type type_numhash = { 38 numcmp, 39 numhash, @@ -294,7 +294,7 @@ function that searches the hash table, `st_lookup()`. ▼ `st_lookup()` -```TODO-lang +```c 247 int 248 st_lookup(table, key, value) 249 st_table *table; @@ -324,7 +324,7 @@ look at them in order. ▼ `do_hash()` -```TODO-lang +```c 68 #define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key)) (st.c) @@ -332,7 +332,7 @@ look at them in order. Just in case, let us write down the macro body that is difficult to understand: -```TODO-lang +```c (table)->type->hash ``` @@ -345,7 +345,7 @@ Next, let us examine `FIND_ENTRY()`. ▼ `FIND_ENTRY()` -```TODO-lang +```c 235 #define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\ 236 bin_pos = hash_val%(table)->num_bins;\ 237 ptr = (table)->bins[bin_pos];\ @@ -386,7 +386,7 @@ end. Also, there is no semicolon added after the `while(0)`. -```TODO-lang +```c FIND_ENTRY(); ``` @@ -402,7 +402,7 @@ in the function name. ▼ `st_add_direct()` -```TODO-lang +```c 308 void 309 st_add_direct(table, key, value) 310 st_table *table; @@ -428,7 +428,7 @@ Since the name is all uppercase, we can anticipate that is a macro. ▼ `ADD_DIRECT()` -```TODO-lang +```c 268 #define ADD_DIRECT(table, key, value, hash_val, bin_pos) \ 269 do { \ 270 st_table_entry *entry; \ @@ -460,7 +460,7 @@ The first `if` is an exception case so I will explain it afterwards. (B) Insert the `entry` into the start of the list. This is the idiom for handling the list. In other words, -```TODO-lang +```c entry->next = list_beg; list_beg = entry; ``` @@ -473,7 +473,7 @@ Now, let me explain the code I left aside. ▼ `ADD_DIRECT()`-`rehash` -```TODO-lang +```c 271 if (table->num_entries / (table->num_bins) \ > ST_DEFAULT_MAX_DENSITY) { \ 272 rehash(table); \ @@ -493,7 +493,7 @@ The current `ST_DEFAULT_MAX_DENSITY` is ▼ `ST_DEFAULT_MAX_DENSITY` -```TODO-lang +```c 23 #define ST_DEFAULT_MAX_DENSITY 5 (st.c) @@ -509,7 +509,7 @@ then the size will be increased. ▼ `st_insert()` -```TODO-lang +```c 286 int 287 st_insert(table, key, value) 288 register st_table *table; @@ -553,7 +553,9 @@ is rather long, so let's omit the middle. ▼ `rb_intern()` (simplified) -```TODO-lang + + +```yacc 5451 static st_table *sym_tbl; /* char* to ID */ 5452 static st_table *sym_rev_tbl; /* ID to char* */ @@ -604,7 +606,7 @@ simplify it. ▼ `rb_id2name()` (simplified) -```TODO-lang +```c char * rb_id2name(id) ID id; @@ -639,7 +641,7 @@ And it can be obtained like so: `"string".intern`. The implementation of ▼ `rb_str_intern()` -```TODO-lang +```c 2996 static VALUE 2997 rb_str_intern(str) 2998 VALUE str; @@ -671,7 +673,7 @@ The implementation is in `sym_to_s`. ▼ `sym_to_s()` -```TODO-lang +```c 522 static VALUE 523 sym_to_s(sym) 524 VALUE sym; diff --git a/object.md b/object.md index 88e1e70..71d20e4 100644 --- a/object.md +++ b/object.md @@ -44,7 +44,7 @@ Here is the definition of `VALUE`: ▼ `VALUE` -```TODO-lang +```c 71 typedef unsigned long VALUE; (ruby.h) @@ -60,19 +60,19 @@ but some time ago it seems there were quite a few of them. The structs, on the other hand, have several variations, a different struct is used based on the class of the object. -| `struct RObject` | all things for which none of the following - applies | -| `struct RClass` | class object | -| `struct RFloat` | small numbers | -| `struct RString` | string | -| `struct RArray` | array | -| `struct RRegexp` | regular expression | -| `struct RHash` | hash table | -| `struct RFile` | `IO`, `File`, `Socket`, etc... | -| `struct RData` | all the classes defined at C level, except the - ones mentioned above | -| `struct RStruct` | Ruby's `Struct` class | -| `struct RBignum` | big integers | +| `struct` | variation | +| ---------------- | ------------------------------------------------------------------- | +| `struct RObject` | all things for which none of the following applies | +| `struct RClass` | class object | +| `struct RFloat` | small numbers | +| `struct RString` | string | +| `struct RArray` | array | +| `struct RRegexp` | regular expression | +| `struct RHash` | hash table | +| `struct RFile` | `IO`, `File`, `Socket`, etc... | +| `struct RData` | all the classes defined at C level, except the ones mentioned above | +| `struct RStruct` | Ruby's `Struct` class | +| `struct RBignum` | big integers | For example, for an string object, `struct RString` is used, so we will have something like the following. @@ -86,7 +86,7 @@ Let's look at the definition of a few object structs. ▼ Examples of object struct -```TODO-lang +```c /* struct for ordinary objects */ 295 struct RObject { 296 struct RBasic basic; @@ -127,7 +127,7 @@ That's why `Rxxxx()` macros have been made for each object struct. For example, for `struct RString` there is `RSTRING()`, for `struct RArray` there is `RARRAY()`, etc... These macros are used like this: -```TODO-lang +```c VALUE str = ....; VALUE arr = ....; @@ -152,7 +152,7 @@ for `struct RBasic`: ▼ `struct RBasic` -```TODO-lang +```c 290 struct RBasic { 291 unsigned long flags; 292 VALUE klass; @@ -165,7 +165,7 @@ for `struct RBasic`: (for instance `struct RObject`). The type flags are named `T_xxxx`, and can be obtained from a `VALUE` using the macro `TYPE()`. Here is an example: -```TODO-lang +```c VALUE str; str = rb_str_new(); /* creates a Ruby string (its struct is RString) */ TYPE(str); /* the return value is T_STRING */ @@ -273,7 +273,7 @@ to a `Fixnum`, and confirm that `Fixnum` are directly embedded in `VALUE`. ▼ `INT2FIX` -```TODO-lang +```c 123 #define INT2FIX(i) ((VALUE)(((long)(i))<<1 | FIXNUM_FLAG)) 122 #define FIXNUM_FLAG 0x01 @@ -308,7 +308,7 @@ In the first place, there's a type named `ID` used inside `ruby`. Here it is. ▼ `ID` -```TODO-lang +```c 72 typedef unsigned long ID; (ruby.h) @@ -342,7 +342,7 @@ why `Symbol`, like `Fixnum`, was made embedded in `VALUE`. Let's look at the ▼ `ID2SYM` -```TODO-lang +```c 158 #define SYMBOL_FLAG 0x0e 160 #define ID2SYM(x) ((VALUE)(((long)(x))<<8|SYMBOL_FLAG)) @@ -359,7 +359,7 @@ Finally, let's see the reverse conversion of `ID2SYM()`, `SYM2ID()`. ▼ `SYM2ID()` -```TODO-lang +```c 161 #define SYM2ID(x) RSHIFT((long)x,8) (ruby.h) @@ -376,7 +376,7 @@ values at the C level are defined like this: ▼ `true false nil` -```TODO-lang +```c 164 #define Qfalse 0 /* Ruby's false */ 165 #define Qtrue 2 /* Ruby's true */ 166 #define Qnil 4 /* Ruby's nil */ @@ -397,7 +397,7 @@ For `Qnil`, there is a macro dedicated to check if a `VALUE` is `Qnil` or not, ▼ `NIL_P()` -```TODO-lang +```c 170 #define NIL_P(v) ((VALUE)(v) == Qnil) (ruby.h) @@ -415,7 +415,7 @@ That's why there's the `RTEST()` macro to do Ruby-style test in C. ▼ `RTEST()` -```TODO-lang +```c 169 #define RTEST(v) (((VALUE)(v) & ~Qnil) != 0) (ruby.h) @@ -436,7 +436,7 @@ not have the fun answer I was expecting... ▼ `Qundef` -```TODO-lang +```c 167 #define Qundef 6 /* undefined value for placeholder */ (ruby.h) @@ -465,7 +465,7 @@ differentiated by the `T_MODULE` struct flag. ▼ `struct RClass` -```TODO-lang +```c 300 struct RClass { 301 struct RBasic basic; 302 struct st_table *iv_tbl; @@ -518,7 +518,7 @@ The sequential search process in `m_tbl` is done by `search_method()`. ▼ `search_method()` -```TODO-lang +```c 256 static NODE* 257 search_method(klass, id, origin) 258 VALUE klass, *origin; @@ -543,7 +543,7 @@ This function searches the method named `id` in the class object `klass`. `RCLASS(value)` is the macro doing: -```TODO-lang +```c ((struct RClass*)(value)) ``` @@ -572,7 +572,7 @@ but is it really so? Let's look at the function ▼ `rb_ivar_set()` -```TODO-lang +```c /* assign val to the id instance variable of obj */ 984 VALUE 985 rb_ivar_set(obj, id, val) @@ -610,7 +610,7 @@ Therefore, we should wholly ignore them at first read. After removing the error handling, only the `switch` remains, but -```TODO-lang +```c switch (TYPE(obj)) { case T_aaaa: case T_bbbb: @@ -631,7 +631,7 @@ the basis that their second member is `iv_tbl`. Let's confirm it in practice. ▼ Structs whose second member is `iv_tbl` -```TODO-lang +```c /* TYPE(val) == T_OBJECT */ 295 struct RObject { 296 struct RBasic basic; @@ -655,7 +655,7 @@ It records the correspondences between the instance variable names and their val In `rb_ivar_set()`, let's look again the code for the structs having `iv_tbl`. -```TODO-lang +```c if (!ROBJECT(obj)->iv_tbl) ROBJECT(obj)->iv_tbl = st_init_numtable(); st_insert(ROBJECT(obj)->iv_tbl, id, val); @@ -678,7 +678,7 @@ its instance variable table is for the class object itself. In Ruby programs, it corresponds to something like the following: -```TODO-lang +```ruby class C @ivar = "content" end @@ -691,7 +691,7 @@ an object whose struct is not one of `T_OBJECT T_MODULE T_CLASS`? ▼ `rb_ivar_set()` in the case there is no `iv_tbl` -```TODO-lang +```c 1000 default: 1001 generic_ivar_set(obj, id, val); 1002 break; @@ -719,7 +719,7 @@ Let's see this in practice. ▼ `generic_ivar_set()` -```TODO-lang +```c 801 static st_table *generic_iv_tbl; 830 static void @@ -840,7 +840,7 @@ how to get them. ▼ `rb_ivar_get()` -```TODO-lang +```c 960 VALUE 961 rb_ivar_get(obj, id) 962 VALUE obj; @@ -911,7 +911,7 @@ its subclasses. ▼ `struct RString` -```TODO-lang +```c 314 struct RString { 315 struct RBasic basic; 316 long len; @@ -956,7 +956,7 @@ characteristics. Ruby's strings can be modified (are mutable). By mutable I mean after the following code: -```TODO-lang +```ruby s = "str" # create a string and assign it to s s.concat("ing") # append "ing" to this string object p(s) # show "string" @@ -980,7 +980,7 @@ additional memory. So what is this other `aux.shared`? It's to speed up the creation of literal strings. Have a look at the following Ruby program. -```TODO-lang +```ruby while true do # repeat indefinitely a = "str" # create a string with "str" as content and assign it to a a.concat("ing") # append "ing" to the object pointed by a @@ -1014,7 +1014,7 @@ modifying strings created as litterals, `aux.shared` has to be separated. Before ending this section, I'll write some examples of dealing with `RString`. I'd like you to regard `str` as a `VALUE` that points to `RString` when reading this. -```TODO-lang +```c RSTRING(str)->len; /* length */ RSTRING(str)->ptr[0]; /* first character */ str = rb_str_new("content", 7); /* create a string with "content" as its content @@ -1031,7 +1031,7 @@ rb_str_cat2(str, "end"); /* Concatenate a C string to a Ruby string */ ▼ `struct RArray` -```TODO-lang +```c 324 struct RArray { 325 struct RBasic basic; 326 long len; @@ -1063,7 +1063,7 @@ With `RARRAY(arr)->ptr` and `RARRAY(arr)->len`, you can refer to the members, and it is allowed, but you must not assign to them, etc. We'll only look at simple examples: -```TODO-lang +```c /* manage an array from C */ VALUE ary; ary = rb_ary_new(); /* create an empty array */ @@ -1084,7 +1084,7 @@ It's the struct for the instances of the regular expression class `Regexp`. ▼ `struct RRegexp` -```TODO-lang +```c 334 struct RRegexp { 335 struct RBasic basic; 336 struct re_pattern_buffer *ptr; @@ -1110,7 +1110,7 @@ which is Ruby's hash table. ▼ `struct RHash` -```TODO-lang +```c 341 struct RHash { 342 struct RBasic basic; 343 struct st_table *tbl; @@ -1134,7 +1134,7 @@ its subclasses. ▼ `struct RFile` -```TODO-lang +```c 348 struct RFile { 349 struct RBasic basic; 350 struct OpenFile *fptr; @@ -1145,7 +1145,7 @@ its subclasses. ▼ `OpenFile` -```TODO-lang +```c 19 typedef struct OpenFile { 20 FILE *f; /* stdio ptr for read/write */ 21 FILE *f2; /* additional ptr for rw pipes */ @@ -1176,7 +1176,7 @@ for managing a pointer to a user defined struct" has been created on ▼ `struct RData` -```TODO-lang +```c 353 struct RData { 354 struct RBasic basic; 355 void (*dmark) _((void*)); diff --git a/parser.md b/parser.md index 47d6af3..7068a84 100644 --- a/parser.md +++ b/parser.md @@ -712,7 +712,7 @@ is to deal with the methods without parentheses. For example, it is to distinguish the next two from each other: -```TODO-lang +```ruby p Net::HTTP # p(Net::HTTP) p Net ::HTTP # p(Net(::HTTP)) ``` @@ -791,7 +791,7 @@ surrounding the multiple arguments of a `return` with parentheses as in the following code should be impossible. -```TODO-lang +```ruby return(1, 2, 3) # interpreted as return (1,2,3) and results in parse error ``` @@ -840,7 +840,7 @@ Because `primary` is also `arg`, we can also do something like this. -```TODO-lang +```ruby p(if true then 'ok' end) # shows "ok" ``` @@ -1096,7 +1096,7 @@ First, I'll start with `nextc()` that seems the most orthodox.

▼ `nextc()`

-```TODO-lang +```yacc 2468 static inline int 2469 nextc() 2470 { @@ -1193,7 +1193,7 @@ I searched the place where setting `lex_gets` and this is what I found:

▼ set `lex_gets`

-```TODO-lang +```yacc 2430 NODE* 2431 rb_compile_string(f, s, line) 2432 const char *f; @@ -1228,7 +1228,7 @@ On the other hand, `lex_get_str()` is defined as follows:

▼ `lex_get_str()`

-```TODO-lang +```yacc 2398 static int lex_gets_ptr; 2400 static VALUE @@ -1279,7 +1279,7 @@ we can understand the rest easily.

▼ `pushback()`

-```TODO-lang +```yacc 2501 static void 2502 pushback(c) 2503 int c; @@ -1300,7 +1300,7 @@ we can understand the rest easily.

▼ `peek()`

-```TODO-lang +```yacc 2509 #define peek(c) (lex_p != lex_pend && (c) == *lex_p) (parse.y) @@ -1329,7 +1329,7 @@ Now, we'll start with the data structures.

▼ The Token Buffer

-```TODO-lang +```yacc 2271 static char *tokenbuf = NULL; 2272 static int tokidx, toksiz = 0; @@ -1356,7 +1356,7 @@ read `newtok()`, which starts a new token.

▼ `newtok()`

-```TODO-lang +```yacc 2516 static char* 2517 newtok() 2518 { @@ -1394,7 +1394,7 @@ Next, let's look at the `tokadd()` to add a character to token buffer.

▼ `tokadd()`

-```TODO-lang +```yacc 2531 static void 2532 tokadd(c) 2533 char c; @@ -1421,7 +1421,7 @@ The rest interfaces are summarized below.

▼ `tokfix() tok() toklen() toklast()`

-```TODO-lang +```yacc 2511 #define tokfix() (tokenbuf[tokidx]='\0') 2512 #define tok() tokenbuf 2513 #define toklen() tokidx @@ -1446,7 +1446,7 @@ First, I'll show the whole structure that some parts of it are left out.

▼ `yylex` outline

-```TODO-lang +```yacc 3106 static int 3107 yylex() 3108 { @@ -1534,7 +1534,7 @@ Let's start with what is simple first.

▼ `yylex` - `'!'`

-```TODO-lang +```yacc 3205 case '!': 3206 lex_state = EXPR_BEG; 3207 if ((c = nextc()) == '=') { @@ -1550,11 +1550,11 @@ Let's start with what is simple first. ``` -I wroute out the meaning of the code, +I wrote out the meaning of the code, so I'd like you to read them by comparing each other. -```TODO-lang +``` case '!': move to EXPR_BEG if (the next character is '=' then) { @@ -1595,7 +1595,7 @@ Next, we'll try to look at `'<'` as an example of using `yylval` (the value of a

▼ `yylex`−`'>'`

-```TODO-lang +```yacc 3296 case '>': 3297 switch (lex_state) { 3298 case EXPR_FNAME: case EXPR_DOT: @@ -1657,7 +1657,7 @@ The code of `':'` shown below is an example that a space changes the behavior.

▼ `yylex`−`':'`

-```TODO-lang +```yacc 3761 case ':': 3762 c = nextc(); 3763 if (c == ':') { @@ -1705,7 +1705,7 @@ It is the scanning pattern of identifiers. First, the outline of `yylex` was as follows: -```TODO-lang +```yacc yylex(...) { switch (c = nextc()) { @@ -1727,7 +1727,7 @@ This is relatively long, so I'll show it with comments.

▼ `yylex` -- identifiers

-```TODO-lang +```yacc 4081 case '@': /* an instance variable or a class variable */ 4082 c = nextc(); 4083 newtok(); @@ -1791,7 +1791,7 @@ at the place where adding `!` or `?`. This part is to interpret in the next way. -```TODO-lang +```ruby obj.m=1 # obj.m = 1 (not obj.m=) obj.m!=1 # obj.m != 1 (not obj.m!) ``` @@ -1830,7 +1830,7 @@ Usually, only the data would be separated to a list or a hash in order to keep the code short. -```TODO-lang +```c /* convert the code to data */ struct entry {char *name; int symbol;}; struct entry *table[] = { @@ -1888,7 +1888,7 @@ The definition of `struct kwtable` is as follows:

▼ `kwtable`

-```TODO-lang +``` 1 struct kwtable {char *name; int id[2]; enum lex_state state;}; (keywords) @@ -1906,7 +1906,7 @@ This is the place where actually looking up.

▼ `yylex()` -- identifier -- call `rb_reserved_word()`

-```TODO-lang +```yacc 4173 struct kwtable *kw; 4174 4175 /* See if it is a reserved word. */ @@ -1927,7 +1927,7 @@ The double quote (`"`) part of `yylex()` is this.

▼ `yylex` − `'"'`

-```TODO-lang +```yacc 3318 case '"': 3319 lex_strterm = NEW_STRTERM(str_dquote, '"', 0); 3320 return tSTRING_BEG; @@ -1943,7 +1943,7 @@ Then, this time, when taking a look at the rule,

▼ rules for strings

-```TODO-lang +```yacc string1 : tSTRING_BEG string_contents tSTRING_END string_contents : @@ -1968,7 +1968,7 @@ These rules are the part introduced to deal with embedded expressions inside of `tSTRING_DVAR` represents "`#` that in front of a variable". For example, -```TODO-lang +```ruby ".....#$gvar...." ``` @@ -1995,7 +1995,7 @@ the next `yylex()`. What plays an important role there is ... -```TODO-lang +```yacc case '"': lex_strterm = NEW_STRTERM(str_dquote, '"', 0); return tSTRING_BEG; @@ -2007,7 +2007,7 @@ What plays an important role there is ...

▼ the beginning of `yylex()`

-```TODO-lang +```yacc 3106 static int 3107 yylex() 3108 { @@ -2040,7 +2040,7 @@ This is done in the following part:

▼ `string_content`

-```TODO-lang +```yacc 1916 string_content : .... 1917 | tSTRING_DBEG term_push 1918 { @@ -2099,7 +2099,7 @@ First, let's look at its type.

▼ `lex_strterm`

-```TODO-lang +```yacc 72 static NODE *lex_strterm; (parse.y) @@ -2116,7 +2116,7 @@ you should remember only these two points.

▼ `NEW_STRTERM()`

-```TODO-lang +```yacc 2865 #define NEW_STRTERM(func, term, paren) \ 2866 rb_node_newnode(NODE_STRTERM, (func), (term), (paren)) @@ -2133,7 +2133,7 @@ and if it is a `'` string, it is `'`. `paren` is used to store the corresponding parenthesis when it is a `%` string. For example, -```TODO-lang +```ruby %Q(..........) ``` @@ -2148,7 +2148,7 @@ The available types are decided as follows:

▼ `func`

-```TODO-lang +```yacc 2775 #define STR_FUNC_ESCAPE 0x01 /* backslash notations such as \n are in effect */ 2776 #define STR_FUNC_EXPAND 0x02 /* embedded expressions are in effect */ 2777 #define STR_FUNC_REGEXP 0x04 /* it is a regular expression */ @@ -2170,13 +2170,14 @@ The available types are decided as follows: Each meaning of `enum string_type` is as follows: - -| `str_squote` | `'` string / `%q` | -| `str_dquote` | `"` string / `%Q` | +| Type | Meaning | +| ------------ | ---------------------------------------------- | +| `str_squote` | `'` string / `%q` | +| `str_dquote` | `"` string / `%Q` | | `str_xquote` | command string (not be explained in this book) | -| `str_regexp` | regular expression | -| `str_sword` | `%w` | -| `str_dword` | `%W` | +| `str_regexp` | regular expression | +| `str_sword` | `%w` | +| `str_dword` | `%W` | @@ -2189,7 +2190,7 @@ in other words, the `if` at the beginning.

▼ `yylex`− string

-```TODO-lang +```yacc 3114 if (lex_strterm) { 3115 int token; 3116 if (nd_type(lex_strterm) == NODE_HEREDOC) { @@ -2242,7 +2243,7 @@ First, I'll show the code of `yylex()` to scan the starting symbol of a here doc

▼ `yylex`−`'<'`

-```TODO-lang +```yacc 3260 case '<': 3261 c = nextc(); 3262 if (c == '<' && @@ -2267,7 +2268,7 @@ Therefore, here is `heredoc_identifier()`.

▼ `heredoc_identifier()`

-```TODO-lang +```yacc 2926 static int 2927 heredoc_identifier() 2928 { @@ -2292,8 +2293,8 @@ Until now, the input buffer probably has become as depicted as Figure 10. Let's recall that the input buffer reads a line at a time.
- figure 10: scanning ` -
figure 10: scanning `"printf\(< + scanning `"printf(<<EOS,n)"` +
scanning "printf(<
@@ -2309,7 +2310,7 @@ read line) and `len` (the length that has already read) are saved. Then, the dynamic call graph before and after `heredoc_identifier` is simply shown below: -```TODO-lang +``` yyparse yylex(case '<') heredoc_identifier(lex_strterm = ....) @@ -2326,7 +2327,7 @@ Notice that `lex_strterm` remains unchanged after it was set at `heredoc_identif

▼ `here_document()`(simplified)

-```TODO-lang +```yacc here_document(NODE *here) { VALUE line; /* the line currently being scanned */ @@ -2371,7 +2372,7 @@ And finally, leaving the `do` ~ `while` loop, it is `heredoc_restore()`.

▼ `heredoc_restore()`

-```TODO-lang +```yacc 2990 static void 2991 heredoc_restore(here) 2992 NODE *here; diff --git a/spec.md b/spec.md index 89fd1c2..2e51469 100644 --- a/spec.md +++ b/spec.md @@ -261,7 +261,7 @@ by a `b` followed by a `c`. It matches "abc" or "fffffffabc" or One can designate more special patterns. -```TODO-lang +```ruby /^From:/ ``` @@ -299,7 +299,7 @@ Also as with strings, regular expressions also have a syntax for changing delimiters. In this case it is `%r`. To understand this, looking at some examples are enough to understand. -```TODO-lang +```ruby %r(regexp) %r[/\*.*?\*/] # matches a C comment %r("(?:[^"\\]+|\\.)*") # matches a string in C @@ -310,7 +310,7 @@ examples are enough to understand. A comma-separated list enclosed in brackets `[]` is an array literal. -```TODO-lang +```ruby [1, 2, 3] ['This', 'is', 'an', 'array', 'of', 'string'] @@ -331,7 +331,7 @@ together can also be written straightforwardly. Note that this is "an expression which generates an array object" as with the other literals. -```TODO-lang +```ruby i = 0 while i < 5 p([1,2,3].id) # Each time another object id is shown. @@ -345,7 +345,7 @@ When writing scripts one uses arrays of strings a lot, hence there is a special notation only for arrays of strings. That is `%w`. With an example it's immediately obvious. -```TODO-lang +```ruby %w( alpha beta gamma delta ) # ['alpha','beta','gamma','delta'] %w( 月 火 水 木 金 土 日 ) %w( Jan Feb Mar Apr May Jun @@ -355,7 +355,7 @@ That is `%w`. With an example it's immediately obvious. There's also `%W` where expressions can be embedded. It's a feature implemented fairly recently. -```TODO-lang +```ruby n = 5 %w( list0 list#{n} ) # ['list0', 'list#{n}'] %W( list0 list#{n} ) # ['list0', 'list5'] @@ -369,7 +369,7 @@ Hash tables are data structure which store a one-to-one relation between arbitrary objects. By writing as follows, they will be expressions to generate tables. -```TODO-lang +```ruby { 'key' => 'value', 'key2' => 'value2' } { 3 => 0, 'string' => 5, ['array'] => 9 } { Object.new() => 3, Object.new() => 'string' } @@ -389,14 +389,14 @@ Furthermore, when used as an argument of a method call, the `{...}` can be omitted under a certain condition. -```TODO-lang +```ruby some_method(arg, key => value, key2 => value2) # some_method(arg, {key => value, key2 => value2}) # same as above ``` With this we can imitate named (keyword) arguments. -```TODO-lang +```ruby button.set_geometry('x' => 80, 'y' => '240') ``` @@ -409,7 +409,7 @@ it's not the case for this because this is just a "imitation". Range literals are oddballs which don't appear in most other languages. Here are some expressions which generate Range objects. -```TODO-lang +```ruby 0..5 # from 0 to 5 containing 5 0...5 # from 0 to 5 not containing 5 1+2 .. 9+0 # from 3 to 9 containing 9 @@ -430,7 +430,7 @@ it would be a runtime error. By the way, because the precedence of `..` and `...` is quite low, sometimes it is interpreted in a surprising way. -```TODO-lang +```ruby 1..5.to_a() # 1..(5.to_a()) ``` @@ -443,7 +443,7 @@ In Part 1, we talked about symbols at length. It's something corresponds one-to-one to an arbitrary string. In Ruby symbols are expressed with a `:` in front. -```TODO-lang +```ruby :identifier :abcde ``` @@ -452,7 +452,7 @@ These examples are pretty normal. Actually, besides them, all variable names and method names can become symbols with a `:` in front. Like this: -```TODO-lang +```ruby :$gvar :@ivar :@@cvar @@ -463,7 +463,7 @@ Moreover, though we haven't talked this yet, `[]` or `attr=` can be used as method names, so naturally they can also be used as symbols. -```TODO-lang +```ruby :[] :attr= ``` @@ -477,7 +477,7 @@ This is the least interesting. One possible thing I can introduce here is that, when writing a million, -```TODO-lang +```ruby 1_000_000 ``` @@ -493,7 +493,7 @@ Let's talk about the definition and calling of methods. ### Definition and Calls -```TODO-lang +```ruby def some_method( arg ) .... end @@ -510,7 +510,7 @@ they become function style methods, inside a class they become methods of this class. To call a method which was defined in a class, one usually has to create an instance with `new` as shown below. -```TODO-lang +```ruby C.new().some_method(0) ``` @@ -520,7 +520,7 @@ The return value of a method is, if a `return` is executed in the middle, its value. Otherwise, it's the value of the statement which was executed last. -```TODO-lang +```ruby def one() # 1 is returned return 1 999 @@ -551,7 +551,7 @@ Optional arguments can also be defined. If the number of arguments doesn't suffice, the parameters are automatically assigned to default values. -```TODO-lang +```ruby def some_method( arg = 9 ) # default value is 9 p arg end @@ -565,7 +565,7 @@ But in that case they must all come at the end of the argument list. If elements in the middle of the list were optional, how the correspondences of the arguments would be very unclear. -```TODO-lang +```ruby def right_decl( arg1, arg2, darg1 = nil, darg2 = nil ) .... end @@ -580,7 +580,7 @@ end In fact, the parentheses of a method call can be omitted. -```TODO-lang +```ruby puts 'Hello, World!' # puts("Hello, World") obj = Object.new # obj = Object.new() ``` @@ -590,14 +590,14 @@ but there is no such thing in Ruby. If you'd like to, you can omit more parentheses. -```TODO-lang +```ruby puts(File.basename fname) # puts(File.basename(fname)) same as the above ``` If we like we can even leave out more -```TODO-lang +```ruby puts File.basename fname # puts(File.basename(fname)) same as the above ``` @@ -607,7 +607,7 @@ It's likely that this will not pass anymore in Ruby 2.0. Actually even the parentheses of the parameters definition can also be omitted. -```TODO-lang +```ruby def some_method param1, param2, param3 end @@ -626,7 +626,7 @@ there's nothing odd if we can do something converse: extracting a list (an array) as arguments, as the following example. -```TODO-lang +```ruby def delegate(a, b, c) p(a, b, c) end @@ -640,7 +640,7 @@ Let's call this device a `*`argument now. Here we used a local variable for demonstration, but of course there is no limitation. We can also directly put a literal or a method call instead. -```TODO-lang +```ruby m(*[1,2,3]) # We could have written the expanded form in the first place... m(*mcall()) ``` @@ -653,7 +653,7 @@ single way. In the definition on the other hand we can handle the arguments in bulk when we put a `*` in front of the parameter variable. -```TODO-lang +```ruby def some_method( *args ) p args end @@ -666,7 +666,7 @@ some_method(0, 1) # prints [0,1] The surplus arguments are gathered in an array. Only one `*`parameter can be declared. It must also come after the default arguments. -```TODO-lang +```ruby def some_method0( arg, *rest ) end def some_method1( arg, darg = nil, *rest ) @@ -677,7 +677,7 @@ If we combine list expansion and bulk reception together, the arguments of one method can be passed as a whole to another method. This might be the most practical use of the `*`parameter. -```TODO-lang +```ruby # a method which passes its arguments to other_method def delegate(*args) other_method(*args) @@ -699,7 +699,7 @@ In Ruby there is a ton of it, and they are really attractive for a person who has a fetish for parsers. For instance the examples below are all method calls. -```TODO-lang +```ruby 1 + 2 # 1.+(2) a == b # a.==(b) ~/regexp/ # /regexp/.~ @@ -713,7 +713,7 @@ It's hard to believe until you get used to it, but `attr=`, `[]=`, `\`` are (indeed) all method names. They can appear as names in a method definition and can also be used as symbols. -```TODO-lang +```ruby class C def []( index ) end @@ -734,7 +734,7 @@ Let's see some more details. #### Symbol Appendices -```TODO-lang +```ruby obj.name? obj.name! ``` @@ -748,7 +748,7 @@ of characters can be used in procedure names. #### Binary Operators -```TODO-lang +```ruby 1 + 2 # 1.+(2) ``` @@ -758,7 +758,7 @@ As listed below there are many of them. There are the general operators `+` and `-`, also the equivalence operator `==` and the spaceship operator `<=>' as in Perl, all sorts. They are listed in order of their precedence. -```TODO-lang +```ruby ** * / % + - @@ -774,7 +774,7 @@ are built-in operators. Remember how it is in C. #### Unary Operators -```TODO-lang +```ruby +2 -1.0 ~/regexp/ @@ -795,7 +795,7 @@ part of the literal. This is a kind of optimizations.)) #### Attribute Assignment -```TODO-lang +```ruby obj.attr = val # obj.attr=(val) ``` @@ -803,7 +803,7 @@ This is an attribute assignment fashion. The above will be translated into the method call `attr=`. When using this together with method calls whose parentheses are omitted, we can write code which looks like attribute access. -```TODO-lang +```ruby class C def i() @i end # We can write the definition in one line def i=(n) @i = n end @@ -822,14 +822,14 @@ which can take another argument in the attribute assignment fashion. #### Index Notation -```TODO-lang +```ruby obj[i] # obj.[](i) ``` The above will be translated into a method call for `[]`. Array and hash access are also implemented with this device. -```TODO-lang +```ruby obj[i] = val # obj.[]=(i, val) ``` @@ -845,7 +845,7 @@ Here a mechanism to call a method of the superclass when overwriting a method is required. In Ruby, that's `super`. -```TODO-lang +```ruby class A def test puts 'in A' @@ -866,7 +866,7 @@ When using `super`, be careful about the difference between `super` with no arguments and `super` whose arguments are omitted. The `super` whose arguments are omitted passes all the given parameter variables. -```TODO-lang +```ruby class A def test( *args ) p args @@ -912,7 +912,7 @@ Be careful. Usually we control visibility as shown below. -```TODO-lang +```ruby class C public def a1() end # becomes public @@ -949,7 +949,7 @@ then we call this a module function. It is not apparent why this should be useful. But let's look at the next example which is happily used. -```TODO-lang +```ruby Math.sin(5) # If used for a few times this is more convenient include Math @@ -971,14 +971,14 @@ are called exterior iterators, Ruby's iterators are interior iterators. Regarding this, it's difficult to understand from the definition so let's explain it with a concrete example. -```TODO-lang +```ruby arr = [0,2,4,6.8] ``` This array is given and we want to access the elements in order. In C style we would write the following. -```TODO-lang +```ruby i = 0 while i < arr.length print arr[i] @@ -988,7 +988,7 @@ end Using an iterator we can write: -```TODO-lang +```ruby arr.each do |item| print item end @@ -1010,7 +1010,7 @@ to the cut out piece of code. We can also think the other way round. The other parts except `print item` are being cut out and enclosed into the `each` method. -```TODO-lang +```ruby i = 0 while i < arr.length print arr[i] @@ -1031,7 +1031,7 @@ and higher order functions in C differ. Firstly, Ruby iterators can only take one block. For instance we can't do the following. -```TODO-lang +```ruby # Mistake. Several blocks cannot be passed. array_of_array.each do |i| .... @@ -1042,7 +1042,7 @@ end Secondly, Ruby's blocks can share local variables with the code outside. -```TODO-lang +```ruby lvar = 'ok' [0,1,2].each do |i| p lvar # Can acces local variable outside the block. @@ -1061,7 +1061,7 @@ visible. Local variables which are assigned inside a block stay local to that block, it means they become block local variables. Let's check it out. -```TODO-lang +```ruby [0].each do i = 0 p i # 0 @@ -1076,7 +1076,7 @@ This makes `i` block local. It is said block local, so it should not be able to access from the outside. Let's test it. -```TODO-lang +``` % ruby -e ' [0].each do i = 0 @@ -1093,7 +1093,7 @@ surely an error occured. Without a doubt it stayed local to the block. Iterators can also be nested repeatedly. Each time the new block creates another scope. -```TODO-lang +```ruby lvar = 0 [1].each do var1 = 1 @@ -1115,7 +1115,7 @@ nowadays' major languages Ruby's block local variables don't do shadowing. Shadowing means for instance in C that in the code below the two declared variables `i` are different. -```TODO-lang +```c { int i = 3; printf("%d\n", i); /* 3 */ @@ -1133,7 +1133,7 @@ That's why it's called shadowing. But what happens with block local variables of Ruby where there's no shadowing. Let's look at this example. -```TODO-lang +```ruby i = 0 p i # 0 [0].each do @@ -1158,7 +1158,7 @@ First, there are two ways to write an iterator. One is the `do` ~ `end` as used above, the other one is the enclosing in braces. The two expressions below have exactly the same meaning. -```TODO-lang +```ruby arr.each do |i| puts i end @@ -1171,7 +1171,7 @@ arr.each {|i| # The author likes a four space indentation for But grammatically the precedence is different. The braces bind much stronger than `do`~`end`. -```TODO-lang +```ruby m m do .... end # m(m) do....end m m { .... } # m(m() {....}) ``` @@ -1179,7 +1179,7 @@ m m { .... } # m(m() {....}) And iterators are definitely methods, so there are also iterators that take arguments. -```TODO-lang +```ruby re = /^\d/ # regular expression to match a digit at the beginning of the line $stdin.grep(re) do |line| # look repeatedly for this regular expression .... @@ -1192,7 +1192,7 @@ Of course users can write their own iterators. Methods which have a `yield` in their definition text are iterators. Let's try to write an iterator with the same effect as `Array#each`: -```TODO-lang +```ruby # adding the definition to the Array class class Array def my_each @@ -1220,7 +1220,7 @@ when the execution of the block finishes it returns back to the same location. Think about it like a characteristic function call. When the present method does not have a block a runtime error will occur. -```TODO-lang +``` % ruby -e '[0,1,2].each' -e:1:in `each': no block given (LocalJumpError) from -e:1 @@ -1232,7 +1232,7 @@ I said, that iterators are like cut out code which is passed as an argument. But we can even more directly make code to an object and carry it around. -```TODO-lang +```ruby twice = Proc.new {|n| n * 2 } p twice.call(9) # 18 will be printed ``` @@ -1248,7 +1248,7 @@ which turns an iterator block into an object. Besides there is a function style method `lambda` provided which has the same effect as `Proc.new`. Choose whatever suits you. -```TODO-lang +```ruby twice = lambda {|n| n * 2 } ``` @@ -1262,7 +1262,7 @@ That's why one can be transformed into the other. First, to turn an iterator block into a `Proc` object one has to put an `&` in front of the parameter name. -```TODO-lang +```ruby def print_block( &block ) p block end @@ -1278,21 +1278,21 @@ iterator (there's no block attached) `nil` is assigned. And in the other direction, if we want to pass a `Proc` to an iterator we also use `&`. -```TODO-lang +```ruby block = Proc.new {|i| p i } [0,1,2].each(&block) ``` This code means exactly the same as the code below. -```TODO-lang +```ruby [0,1,2].each {|i| p i } ``` If we combine these two, we can delegate an iterator block to a method somewhere else. -```TODO-lang +```ruby def each_item( &block ) [0,1,2].each(&block) end @@ -1319,7 +1319,7 @@ We probably do not need to explain the `if` expression. If the conditional expression is true, the body is executed. As explained in Part 1, every object except `nil` and `false` is true in Ruby. -```TODO-lang +```ruby if cond0 then .... elsif cond1 then @@ -1336,7 +1336,7 @@ But there are some finer requirements concerning `then`. For this kind of thing, looking at some examples is the best way to understand. Here only thing I'd say is that the below codes are valid. -```TODO-lang +```ruby # 1 # 4 if cond then ..... end if cond then .... end @@ -1353,7 +1353,7 @@ expression. It is the value of the body where a condition expression is met. For example, if the condition of the first `if` is true, the value would be the one of its body. -```TODO-lang +```ruby p(if true then 1 else 2 end) #=> 1 p(if false then 1 else 2 end) #=> 2 p(if false then 1 elsif true then 2 else 3 end) #=> 2 @@ -1362,7 +1362,7 @@ p(if false then 1 elsif true then 2 else 3 end) #=> 2 If there's no match, or the matched clause is empty, the value would be `nil`. -```TODO-lang +```ruby p(if false then 1 end) #=> nil p(if true then end) #=> nil ``` @@ -1372,7 +1372,7 @@ p(if true then end) #=> nil An `if` with a negated condition is an `unless`. The following two expressions have the same meaning. -```TODO-lang +```ruby unless cond then if not (cond) then .... .... end end @@ -1392,7 +1392,7 @@ the value would be `nil`. The most likely utilization of the `and` is probably a boolean operation. For instance in the conditional expression of an `if`. -```TODO-lang +```ruby if cond1 and cond2 puts 'ok' end @@ -1402,7 +1402,7 @@ But as in Perl, `sh` or Lisp, it can also be used as a conditional branch expression. The two following expressions have the same meaning. -```TODO-lang +```ruby if invalid?(key) invalid?(key) and return nil return nil end @@ -1410,7 +1410,7 @@ invalid?(key) and return nil return nil `&&` and `and` have the same meaning. Different is the binding order. -```TODO-lang +```ruby method arg0 && arg1 # method(arg0 && arg1) method arg0 and arg1 # method(arg0) and arg1 ``` @@ -1426,7 +1426,7 @@ the right hand side will also be evaluated. On the other hand `or` is the opposite of `and`. If the evaluation of the left hand side is false, the right hand side will also be evaluated. -```TODO-lang +```ruby valid?(key) or return nil ``` @@ -1437,14 +1437,14 @@ different. There is a conditional operator similar to C: -```TODO-lang +```ruby cond ? iftrue : iffalse ``` The space between the symbols is important. If they bump together the following weirdness happens. -```TODO-lang +```ruby cond?iftrue:iffalse # cond?(iftrue(:iffalse)) ``` @@ -1455,7 +1455,7 @@ Either the value of the true side or the value of the false side. Here's a `while` expression. -```TODO-lang +```ruby while cond do .... end @@ -1464,7 +1464,7 @@ end This is the simplest loop syntax. As long as `cond` is true the body is executed. The `do` can be omitted. -```TODO-lang +```ruby until io_ready?(id) do sleep 0.5 end @@ -1479,7 +1479,7 @@ Naturally there is also jump syntaxes to exit a loop. but `continue` is `next`. Perhaps `next` has come from Perl. -```TODO-lang +```ruby i = 0 while true if i > 10 @@ -1494,7 +1494,7 @@ end And there is another Perlism: the `redo`. -```TODO-lang +```ruby while cond # (A) .... @@ -1515,7 +1515,7 @@ necessary after all because I've lived happily despite of it. A special form of the `if` expression. It performs branching on a series of conditions. The following left and right expressions are identical in meaning. -```TODO-lang +```ruby case value when cond1 then if cond1 === value .... .... @@ -1549,7 +1549,7 @@ same. In Ruby exceptions come in the form of the function style method `raise`. `raise` is not a reserved word. -```TODO-lang +```ruby raise ArgumentError, "wrong number of argument" ``` @@ -1560,7 +1560,7 @@ an instance of `ArgumentError` is created and "thrown". Exception object would ditch the part after the `raise` and start to return upwards the method call stack. -```TODO-lang +```ruby def raise_exception raise ArgumentError, "wrong number of argument" # the code after the exception will not be executed @@ -1574,7 +1574,7 @@ finally it will reach the top level. When there's no place to return any more, `ruby` gives out a message and ends with a non-zero exit code. -```TODO-lang +``` % ruby raise.rb raise.rb:2:in `raise_exception': wrong number of argument (ArgumentError) from raise.rb:7 @@ -1585,7 +1585,7 @@ should be a way to set handlers. In Ruby, `begin`~`rescue`~`end` is used for this. It resembles the `try`~`catch` in C++ and Java. -```TODO-lang +```ruby def raise_exception raise ArgumentError, "wrong number of argument" end @@ -1605,7 +1605,7 @@ where `ArgumentError` is targeted, so it matches this `rescue`. By `=>err` the exception object will be assigned to the local variable `err`, after that the `rescue` part is executed. -```TODO-lang +``` % ruby rescue.rb exception catched # @@ -1616,7 +1616,7 @@ it will start to execute the subsequent as if nothing happened, but we can also make it retry from the `begin`. To do so, `retry` is used. -```TODO-lang +```ruby begin # the place to return .... rescue ArgumentError => err then @@ -1632,7 +1632,7 @@ If we want to catch more exception classes, we can just write them in line. When we want to handle different errors differently, we can specify several `rescue` clauses. -```TODO-lang +```ruby begin raise IOError, 'port not ready' rescue ArgumentError, TypeError @@ -1648,7 +1648,7 @@ For instance, only the clause of `IOError` will be executed in the above case. On the other hand, when there is an `else` clause, it is executed only when there is no exception. -```TODO-lang +```ruby begin nil # Of course here will no error occur rescue ArgumentError @@ -1661,7 +1661,7 @@ end Moreover an `ensure` clause will be executed in every case: when there is no exception, when there is an exception, rescued or not. -```TODO-lang +```ruby begin f = File.open('/etc/passwd') # do stuff @@ -1682,7 +1682,7 @@ The reason why the `ensure` is not counted is probably because Referring a variable or a constant. The value is the object the variable points to. We already talked in too much detail about the various behaviors. -```TODO-lang +```ruby lvar @ivar @@cvar @@ -1713,7 +1713,7 @@ Variable assignments are all performed by `=`. All variables are typeless. What is saved is a reference to an object. As its implementation, it was a `VALUE` (pointer). -```TODO-lang +```ruby var = 1 obj = Object.new @ivar = 'string' @@ -1727,13 +1727,13 @@ but a method call. ### Self Assignment -```TODO-lang +```ruby var += 1 ``` This syntax is also in C/C++/Java. In Ruby, -```TODO-lang +```ruby var = var + 1 ``` @@ -1747,7 +1747,7 @@ In Ruby `+=` is always defined as an operation of the combination of `+` and ass We can also combine self assignment and an attribute-access-flavor method. The result more looks like an attribute. -```TODO-lang +```ruby class C def i() @i end # A method definition can be written in one line. def i=(n) @i = n end @@ -1777,7 +1777,7 @@ so I've kept silent and decided to forget about it. `defined?` is a syntax of a quite different color in Ruby. It tells whether an expression value is "defined" or not at runtime. -```TODO-lang +```ruby var = 1 defined?(var) #=> true ``` @@ -1812,7 +1812,7 @@ But Ruby's statement ending's aren't that straightforward. First a statement can be ended explicitly with a semicolon as in C. Of course then we can write two and more statements in one line. -```TODO-lang +```ruby puts 'Hello, World!'; puts 'Hello, World once more!' ``` @@ -1821,7 +1821,7 @@ when the expression apparently continues, such as just after opened parentheses, dyadic operators, or a comma, the statement continues automatically. -```TODO-lang +```ruby # 1 + 3 * method(6, 7 + 8) 1 + 3 * @@ -1833,7 +1833,7 @@ the statement continues automatically. But it's also totally no problem to use a backslash to explicitly indicate the continuation. -```TODO-lang +```ruby p 1 + \ 2 ``` @@ -1843,7 +1843,7 @@ p 1 + \ The `if` modifier is an irregular version of the normal `if` The programs on the left and right mean exactly the same. -```TODO-lang +```ruby on_true() if cond if cond on_true() end @@ -1857,14 +1857,14 @@ be conveniently written with it. `while` and `until` also have a back notation. -```TODO-lang +```ruby process() while have_content? sleep(1) until ready? ``` Combining this with `begin` and `end` gives a `do`-`while`-loop like in C. -```TODO-lang +```ruby begin res = get_response(id) end while need_continue?(res) @@ -1872,7 +1872,7 @@ end while need_continue?(res) ### Class Definition -```TODO-lang +```ruby class C < SuperClass .... end @@ -1888,7 +1888,7 @@ image. ### Method Definition -```TODO-lang +```ruby def m(arg) end ``` @@ -1905,7 +1905,7 @@ to singleton classes. We define singleton methods by putting the receiver in front of the method name. Parameter declaration is done the same way like with ordinary methods. -```TODO-lang +```ruby def obj.some_method end @@ -1915,7 +1915,7 @@ end ### Definition of Singleton methods -```TODO-lang +```ruby class << obj .... end @@ -1929,7 +1929,7 @@ executed. In all over the Ruby program, this is the only place where a singleton class is exposed. -```TODO-lang +```ruby class << obj p self #=> #> # Singleton Class 「(obj)」 def a() end # def obj.a @@ -1942,13 +1942,13 @@ end With a multiple assignment, several assignments can be done all at once. The following is the simplest case: -```TODO-lang +```ruby a, b, c = 1, 2, 3 ``` It's exactly the same as the following. -```TODO-lang +```ruby a = 1 b = 2 c = 3 @@ -1958,7 +1958,7 @@ Just being concise is not interesting. in fact, when an array comes in to be mixed, it becomes something fun for the first time. -```TODO-lang +```ruby a, b, c = [1, 2, 3] ``` @@ -1967,7 +1967,7 @@ Furthermore, the right hand side does not need to be a grammatical list or a literal. It can also be a variable or a method call. -```TODO-lang +```ruby tmp = [1, 2, 3] a, b, c = tmp ret1, ret2 = some_method() # some_method might probably return several values @@ -1987,7 +1987,7 @@ are totally independent from each other. And it goes on, both the left and right hand side can be infinitely nested. -```TODO-lang +```ruby a, (b, c, d) = [1, [2, 3, 4]] a, (b, (c, d)) = [1, [2, [3, 4]]] (a, b), (c, d) = [[1, 2], [3, 4]] @@ -1998,7 +1998,7 @@ each line will be `a=1 b=2 c=3 d=4`. And it goes on. The left hand side can be index or parameter assignments. -```TODO-lang +```ruby i = 0 arr = [] arr[i], arr[i+1], arr[i+2] = 0, 2, 4 @@ -2010,7 +2010,7 @@ obj.attr0, obj.attr1, obj.attr2 = "a", "b", "c" And like with method parameters, `*` can be used to receive in a bundle. -```TODO-lang +```ruby first, *rest = 0, 1, 2, 3, 4 p first # 0 p rest # [1, 2, 3, 4] @@ -2024,7 +2024,7 @@ We brushed over block parameters when we were talking about iterators. But there is a deep relationship between them and multiple assignment. For instance in the following case. -```TODO-lang +```ruby array.each do |i| .... end @@ -2037,7 +2037,7 @@ But if there are two or more variables, it would a little more look like it. For instance, `Hash#each` is an repeated operation on the pairs of keys and values, so usually we call it like this: -```TODO-lang +```ruby hash.each do |key, value| .... end @@ -2048,7 +2048,7 @@ from the hash. Hence we can also does the following thing by using nested multiple assignment. -```TODO-lang +```ruby # [[key,value],index] are yielded hash.each_with_index do |(key, value), index| .... @@ -2057,7 +2057,7 @@ end ### `alias` -```TODO-lang +```ruby class C alias new orig end @@ -2073,7 +2073,7 @@ other one still remains with the same behavior. ### `undef` -```TODO-lang +```ruby class C undef method_name end @@ -2095,7 +2095,7 @@ is `Module#remove_method`. While defining a class, `self` refers to that class, we can call it as follows (Remember that `Class` is a subclass of `Module`.) -```TODO-lang +```ruby class C remove_method(:method_name) end @@ -2111,7 +2111,7 @@ Some more small topics ### Comments -```TODO-lang +```ruby # examples of bad comments. 1 + 1 # compute 1+1. alias my_id id # my_id is an alias of id. @@ -2122,7 +2122,7 @@ It doesn't have a meaning for the program. ### Embedded documents -```TODO-lang +```ruby =begin This is an embedded document. It's so called because it is embedded in the program. @@ -2147,7 +2147,7 @@ String literals, regular expressions and even operator names can contain multibyte characters. Hence it is possible to do something like this: -```TODO-lang +```ruby def 表示( arg ) puts arg end From 6119add4f86ce69a18d68ad224ce6c705aa7850b Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Fri, 23 Sep 2022 14:03:39 +0200 Subject: [PATCH 13/14] Create CNAME --- CNAME | 1 + 1 file changed, 1 insertion(+) create mode 100644 CNAME diff --git a/CNAME b/CNAME new file mode 100644 index 0000000..48be225 --- /dev/null +++ b/CNAME @@ -0,0 +1 @@ +ruby-hacking-guide.ulysse.md \ No newline at end of file From 14c5e63c4bd14f4518a75f10583db7836b8dcae1 Mon Sep 17 00:00:00 2001 From: Ulysse Buonomo Date: Sat, 22 Oct 2022 15:25:10 +0200 Subject: [PATCH 14/14] Update CNAME --- CNAME | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CNAME b/CNAME index 48be225..69572fc 100644 --- a/CNAME +++ b/CNAME @@ -1 +1 @@ -ruby-hacking-guide.ulysse.md \ No newline at end of file +rubyhackingguide.ulysse.md \ No newline at end of file