]> git.uio.no Git - ifi-stolz-refaktor.git/blame - thesis/master-thesis-erlenkr.tex
Benchmark: removing some guard statements
[ifi-stolz-refaktor.git] / thesis / master-thesis-erlenkr.tex
CommitLineData
c8088eec 1\documentclass[USenglish,11pt]{ifimaster}
571ef294 2\usepackage{import}
7c28933b 3\usepackage[utf8]{inputenc}
9ff90080 4\usepackage[T1]{fontenc,url}
3510e539 5\usepackage{lmodern} % using Latin Modern to be able to use bold typewriter font
b4e539f7 6%\usepackage{mathpazo}
9ff90080 7\urlstyle{sf}
128adb4f 8\usepackage{listings}
4e468834 9\usepackage{tabularx}
d11bcf4d
EK
10\usepackage{tikz}
11\usepackage{tikz-qtree}
5e5908eb 12\usetikzlibrary{shapes,snakes,trees,arrows,shadows,positioning,calc}
ae138b9d
EK
13\usepackage{babel,textcomp,csquotes,ifimasterforside}
14
15\usepackage{varioref}
79d5c2a9 16\usepackage[hidelinks]{hyperref}
8b6b22c8 17\usepackage{cleveref}
b4a197fd 18\usepackage[xindy]{glossaries}
ae138b9d 19
fe0a4c48 20\usepackage[style=alphabetic,backend=biber]{biblatex}
12c254af 21\usepackage{amsthm}
3471cd15 22\usepackage{mathtools}
0ab2e134
EK
23\usepackage{graphicx}
24% use 'disable' before printing:
25\usepackage[]{todonotes}
0d7fbd88
EK
26\usepackage{xspace}
27\usepackage{he-she}
b289552b 28\usepackage{verbatim}
ddcea0b5 29\usepackage{minted}
347ed677 30\usepackage{multicol}
ddcea0b5 31\usemintedstyle{bw}
8fae7b44
EK
32\usepackage{perpage} %the perpage package
33\MakePerPage{footnote} %the perpage package command
9ff90080 34
6c51af15 35\theoremstyle{definition}
12c254af 36\newtheorem*{wordDef}{Definition}
3471cd15 37\newtheorem*{theorem}{Theorem}
12c254af 38
ddcea0b5
EK
39\graphicspath{ {./figures/} }
40
8b6b22c8 41\newcommand{\citing}[1]{~\cite{#1}}
e123ab03
EK
42%\newcommand{\myref}[1]{\cref{#1} on \cpageref{#1}}
43\newcommand{\myref}[1]{\vref{#1}}
4306ef44 44\newcommand{\Myref}[1]{\Vref{#1}}
8b6b22c8 45
b4a197fd 46%\newcommand{\glossref}[1]{\textsuperscript{(\glsrefentry{#1})}}
20bcc7bf
EK
47%\newcommand{\gloss}[1]{\gls{#1}\glossref{#1}}
48%\newcommand{\glosspl}[1]{\glspl{#1}\glossref{#1}}
49\newcommand{\gloss}[1]{\gls{#1}}
50\newcommand{\glosspl}[1]{\glspl{#1}}
fe0a4c48 51
12c254af 52\newcommand{\definition}[1]{\begin{wordDef}#1\end{wordDef}}
8b6b22c8 53\newcommand{\see}[1]{(see \myref{#1})}
137e0e7b
EK
54\newcommand{\explanation}[3]{\noindent\textbf{\textit{#1}}\\*\emph{When:}
55#2\\*\emph{How:} #3\\*[-7px]}
4e135659 56
128adb4f 57%\newcommand{\type}[1]{\lstinline{#1}}
03674629
EK
58\newcommand{\code}[1]{\texttt{\textbf{#1}}}
59\newcommand{\type}[1]{\code{#1}}
f041551b
EK
60\newcommand{\typeref}[1]{\footnote{\type{#1}}}
61\newcommand{\typewithref}[2]{\type{#2}\typeref{#1.#2}}
62\newcommand{\method}[1]{\type{#1}}
63\newcommand{\methodref}[2]{\footnote{\type{#1}\method{\##2()}}}
64\newcommand{\methodwithref}[2]{\method{#2}\footnote{\type{#1}\method{\##2()}}}
3510e539 65\newcommand{\var}[1]{\type{#1}}
9ff90080 66
fe0a4c48
EK
67\newcommand{\name}[1]{#1}
68\newcommand{\tit}[1]{\emph{#1}}
69\newcommand{\refa}[1]{\emph{#1}}
70\newcommand{\pattern}[1]{\emph{#1}}
71\newcommand{\metr}[1]{\emph{#1}}
72\newcommand{\ExtractMethod}{\refa{Extract Method}\xspace}
73\newcommand{\MoveMethod}{\refa{Move Method}\xspace}
74\newcommand{\ExtractAndMoveMethod}{\refa{Extract and Move Method}\xspace}
b5c7bb1b 75
1c521a77 76\newcommand{\m}[1]{$#1$}
46f52f52 77
7125ceac 78\newcommand\todoin[2][]{\todo[inline, caption={#2}, #1]{
4e135659
EK
79\begin{minipage}{\textwidth-4pt}#2\end{minipage}}}
80
cbc3b044
EK
81\title{Automated Composition of Refactorings}
82\subtitle{Composing the Extract and Move Method refactorings in Eclipse}
7c28933b
EK
83\author{Erlend Kristiansen}
84
fe0a4c48
EK
85\makeglossaries
86\newglossaryentry{profiling}
87{
88 name=profiling,
89 description={is to run a computer program through a profiler/with a profiler
90 attached}
91}
20bcc7bf
EK
92\newglossaryentry{profiler}
93{
94 name=profiler,
95 description={A profiler is a program for analyzing performance within an
96 application. It is used to analyze memory consumption, processing time and
60065669 97frequency of procedure calls and such}
20bcc7bf
EK
98}
99\newglossaryentry{xUnit}
100{
101 name={xUnit framework},
102 description={An xUnit framework is a framework for writing unit tests for a
103 computer program. It follows the patterns known from the JUnit framework for
104 Java\citing{fowlerXunit}
105 },
106 plural={xUnit frameworks}
107}
fe0a4c48
EK
108\newglossaryentry{softwareObfuscation}
109{
110 name={software obfuscation},
111 description={makes source code harder to read and analyze, while preserving
112 its semantics}
113}
114\newglossaryentry{extractClass}
115{
116 name=\refa{Extract Class},
117 description={The \refa{Extract Class} refactoring works by creating a class,
118for then to move members from another class to that class and access them from
119the old class via a reference to the new class}
120}
121\newglossaryentry{designPattern}
122{
123 name={design pattern},
124 description={A design pattern is a named abstraction, that is meant to solve a
125 general design problem. It describes the key aspects of a common problem and
126identifies its participators and how they collaborate},
127 plural={design patterns}
128}
1c521a77
EK
129\newglossaryentry{enclosingClass}
130{
131 name={enclosing class},
132 description={An enclosing class is the class that surrounds any specific piece
133 of code that is written in the inner scope of this class},
134}
b4a197fd
EK
135%\newglossaryentry{extractMethod}
136%{
137% name=\refa{Extract Method},
138% description={The \refa{Extract Method} refactoring is used to extract a
139%fragment of code from its context and into a new method. A call to the new
140%method is inlined where the fragment was before. It is used to break code into
141%logical units, with names that explain their purpose}
142%}
143%\newglossaryentry{moveMethod}
144%{
145% name=\refa{Move Method},
146% description={The \refa{Move Method} refactoring is used to move a method from
147% one class to another. This is useful if the method is using more features of
148% another class than of the class which it is currently defined. Then all calls
149% to this method must be updated, or the method must be copied, with the old
150%method delegating to the new method}
151%}
f5fb40e4 152
7c28933b 153\bibliography{bibliography/master-thesis-erlenkr-bibliography}
9ff90080 154
c8088eec
EK
155% UML comment in TikZ:
156% ref: https://tex.stackexchange.com/questions/103688/folded-paper-shape-tikz
43ca7be4
EK
157\makeatletter
158\pgfdeclareshape{umlcomment}{
159 \inheritsavedanchors[from=rectangle] % this is nearly a rectangle
160 \inheritanchorborder[from=rectangle]
161 \inheritanchor[from=rectangle]{center}
162 \inheritanchor[from=rectangle]{north}
163 \inheritanchor[from=rectangle]{south}
164 \inheritanchor[from=rectangle]{west}
165 \inheritanchor[from=rectangle]{east}
166 % ... and possibly more
167 \backgroundpath{% this is new
168 % store lower right in xa/ya and upper right in xb/yb
169 \southwest \pgf@xa=\pgf@x \pgf@ya=\pgf@y
170 \northeast \pgf@xb=\pgf@x \pgf@yb=\pgf@y
171 % compute corner of ‘‘flipped page’’
172 \pgf@xc=\pgf@xb \advance\pgf@xc by-10pt % this should be a parameter
173 \pgf@yc=\pgf@yb \advance\pgf@yc by-10pt
174 % construct main path
175 \pgfpathmoveto{\pgfpoint{\pgf@xa}{\pgf@ya}}
176 \pgfpathlineto{\pgfpoint{\pgf@xa}{\pgf@yb}}
177 \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yb}}
178 \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@yc}}
179 \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@ya}}
180 \pgfpathclose
181 % add little corner
182 \pgfpathmoveto{\pgfpoint{\pgf@xc}{\pgf@yb}}
183 \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yc}}
184 \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@yc}}
185 \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yc}}
186 }
187}
188\makeatother
189
190\tikzstyle{comment}=[%
191 draw,
192 drop shadow,
193 fill=white,
194 align=center,
195 shape=document,
196 minimum width=20mm,
197 minimum height=10mm,
198 shape=umlcomment,
199 inner sep=2ex,
200 font=\ttfamily,
201]
202
f5fb40e4
EK
203%\interfootnotelinepenalty=10000
204
9ff90080 205\begin{document}
fe0a4c48 206\pagenumbering{roman}
531c4132 207\ififorside
9ff90080 208\frontmatter{}
9ff90080
EK
209
210
211\chapter*{Abstract}
064227e3
EK
212\todoin{\textbf{Remove all todos (including list) before delivery/printing!!!
213Can be done by removing ``draft'' from documentclass.}}
889ba93e 214\todoin{Write abstract}
9ff90080
EK
215
216\tableofcontents{}
217\listoffigures{}
218\listoftables{}
219
220\chapter*{Preface}
221
d1adbeef
EK
222The discussions in this report must be seen in the context of object oriented
223programming languages, and Java in particular, since that is the language in
224which most of the examples will be given. All though the techniques discussed
225may be applicable to languages from other paradigms, they will not be the
226subject of this report.
f3a108c3 227
055dca93 228\mainmatter
00aa0588 229
740e1b6c 230\chapter{What is Refactoring?}
7c28933b 231
f3a108c3
EK
232This question is best answered by first defining the concept of a
233\emph{refactoring}, what it is to \emph{refactor}, and then discuss what aspects
a1bafe90 234of programming make people want to refactor their code.
00aa0588 235
740e1b6c 236\section{Defining refactoring}
a1bafe90 237Martin Fowler, in his classic book on refactoring\citing{refactoring}, defines a
00aa0588 238refactoring like this:
ee45c41f 239
00aa0588 240\begin{quote}
aa1e3779
EK
241 \emph{Refactoring} (noun): a change made to the internal
242 structure\footnote{The structure observable by the programmer.} of software to
243 make it easier to understand and cheaper to modify without changing its
244 observable behavior.~\cite[p.~53]{refactoring}
00aa0588 245\end{quote}
ee45c41f 246
a1bafe90 247\noindent This definition assigns additional meaning to the word
ee45c41f
EK
248\emph{refactoring}, beyond the composition of the prefix \emph{re-}, usually
249meaning something like ``again'' or ``anew'', and the word \emph{factoring},
6c51af15
EK
250that can mean to isolate the \emph{factors} of something. Here a \emph{factor}
251would be close to the mathematical definition of something that divides a
252quantity, without leaving a remainder. Fowler is mixing the \emph{motivation}
a1bafe90
EK
253behind refactoring into his definition. Instead it could be more refined, formed
254to only consider the \emph{mechanical} and \emph{behavioral} aspects of
255refactoring. That is to factor the program again, putting it together in a
256different way than before, while preserving the behavior of the program. An
257alternative definition could then be:
6c51af15
EK
258
259\definition{A \emph{refactoring} is a transformation
8fae7b44 260done to a program without altering its external behavior.}
00aa0588 261
ee1d883a
EK
262From this we can conclude that a refactoring primarily changes how the
263\emph{code} of a program is perceived by the \emph{programmer}, and not the
740e1b6c
EK
264\emph{behavior} experienced by any user of the program. Although the logical
265meaning is preserved, such changes could potentially alter the program's
266behavior when it comes to performance gain or -penalties. So any logic depending
267on the performance of a program could make the program behave differently after
268a refactoring.
00aa0588 269
fe0a4c48
EK
270In the extreme case one could argue that \gloss{softwareObfuscation} is
271refactoring. It is often used to protect proprietary software. It restrains
272uninvited viewers, so they have a hard time analyzing code that they are not
273supposed to know how works. This could be a problem when using a language that
274is possible to decompile, such as Java.
b4e539f7
EK
275
276Obfuscation could be done composing many, more or less randomly chosen,
277refactorings. Then the question arises whether it can be called a
278\emph{composite refactoring} or not \see{compositeRefactorings}? The answer is
279not obvious. First, there is no way to describe the mechanics of software
280obfuscation, because there are infinitely many ways to do that. Second,
281obfuscation can be thought of as \emph{one operation}: Either the code is
282obfuscated, or it is not. Third, it makes no sense to call software obfuscation
283\emph{a refactoring}, since it holds different meaning to different people.
284
285This last point is important, since one of the motivations behind defining
286different refactorings, is to establish a \emph{vocabulary} for software
287professionals to use when reasoning about and discussing programs, similar to
fe0a4c48 288the motivation behind \glosspl{designPattern}\citing{designPatterns}.
b4e539f7
EK
289\begin{comment}
290So for describing \emph{software obfuscation}, it might be more appropriate to
291define what you do when performing it rather than precisely defining its
292mechanics in terms of other refactorings.
293\end{comment}
00aa0588 294
740e1b6c 295\section{The etymology of 'refactoring'}
f3a108c3
EK
296It is a little difficult to pinpoint the exact origin of the word
297``refactoring'', as it seems to have evolved as part of a colloquial
298terminology, more than a scientific term. There is no authoritative source for a
299formal definition of it.
300
b5c7bb1b 301According to Martin Fowler\citing{etymology-refactoring}, there may also be more
f3a108c3 302than one origin of the word. The most well-known source, when it comes to the
b4e539f7
EK
303origin of \emph{refactoring}, is the
304Smalltalk\footnote{\label{footNote}Programming language} community and their
fe0a4c48 305infamous \name{Refactoring
f3a108c3 306Browser}\footnote{\url{http://st-www.cs.illinois.edu/users/brant/Refactory/RefactoringBrowser.html}}
fe0a4c48 307described in the article \tit{A Refactoring Tool for
b5c7bb1b
EK
308Smalltalk}\citing{refactoringBrowser1997}, published in 1997.
309Allegedly\citing{etymology-refactoring}, the metaphor of factoring programs was
b4e539f7 310also present in the Forth\textsuperscript{\ref{footNote}} community, and the
fe0a4c48
EK
311word ``refactoring'' is mentioned in a book by Leo Brodie, called \tit{Thinking
312Forth}\citing{brodie2004}, first published in 1984\footnote{\tit{Thinking Forth}
313was first published in 1984 by the \name{Forth Interest Group}. Then it was
314reprinted in 1994 with minor typographical corrections, before it was
b4e539f7
EK
315transcribed into an electronic edition typeset in \LaTeX\ and published under a
316Creative Commons licence in
3172004. The edition cited here is the 2004 edition, but the content should
318essentially be as in 1984.}. The exact word is only printed one
319place~\cite[p.~232]{brodie2004}, but the term \emph{factoring} is prominent in
320the book, that also contains a whole chapter dedicated to (re)factoring, and how
321to keep the (Forth) code clean and maintainable.
ee45c41f 322
f3a108c3
EK
323\begin{quote}
324 \ldots good factoring technique is perhaps the most important skill for a
3a154bb7 325 Forth programmer.~\cite[p.~172]{brodie2004}
f3a108c3 326\end{quote}
ee45c41f
EK
327
328\noindent Brodie also express what \emph{factoring} means to him:
329
f3a108c3
EK
330\begin{quote}
331 Factoring means organizing code into useful fragments. To make a fragment
332 useful, you often must separate reusable parts from non-reusable parts. The
333 reusable parts become new definitions. The non-reusable parts become arguments
3a154bb7 334 or parameters to the definitions.~\cite[p.~172]{brodie2004}
f3a108c3
EK
335\end{quote}
336
337Fowler claims that the usage of the word \emph{refactoring} did not pass between
fe0a4c48 338the \name{Forth} and \name{Smalltalk} communities, but that it emerged
f3a108c3
EK
339independently in each of the communities.
340
740e1b6c 341\section{Motivation -- Why people refactor}
2f6e6dec
EK
342There are many reasons why people want to refactor their programs. They can for
343instance do it to remove duplication, break up long methods or to introduce
b4e539f7
EK
344design patterns into their software systems. The shared trait for all these are
345that peoples' intentions are to make their programs \emph{better}, in some
346sense. But what aspects of their programs are becoming improved?
347
348As just mentioned, people often refactor to get rid of duplication. They are
349moving identical or similar code into methods, and are pushing methods up or
350down in their class hierarchies. They are making template methods for
351overlapping algorithms/functionality, and so on. It is all about gathering what
352belongs together and putting it all in one place. The resulting code is then
353easier to maintain. When removing the implicit coupling\footnote{When
354 duplicating code, the duplicate pieces of code might not be coupled, apart
355from representing the same functionality. So if this functionality is going to
356change, it might need to change in more than one place, thus creating an
357implicit coupling between multiple pieces of code.} between code snippets, the
137e0e7b 358location of a bug is limited to only one place, and new functionality need only
a1bafe90
EK
359to be added to this one place, instead of a number of places people might not
360even remember.
361
362A problem you often encounter when programming, is that a program contains a lot
363of long and hard-to-grasp methods. It can then help to break the methods into
b4a197fd
EK
364smaller ones, using the \ExtractMethod refactoring\citing{refactoring}. Then
365you may discover something about a program that you were not aware of before;
366revealing bugs you did not know about or could not find due to the complex
367structure of your program. \todo{Proof?} Making the methods smaller and giving
368good names to the new ones clarifies the algorithms and enhances the
a1bafe90
EK
369\emph{understandability} of the program \see{magic_number_seven}. This makes
370refactoring an excellent method for exploring unknown program code, or code that
371you had forgotten that you wrote.
372
b4e539f7
EK
373Most primitive refactorings are simple, and usually involves moving code
374around\citing{kerievsky2005}. The motivation behind them may first be revealed
375when they are combined into larger --- higher level --- refactorings, called
a1bafe90 376\emph{composite refactorings} \see{compositeRefactorings}. Often the goal of
b4e539f7
EK
377such a series of refactorings is a design pattern. Thus the design can
378\emph{evolve} throughout the lifetime of a program, as opposed to designing
379up-front. It is all about being structured and taking small steps to improve a
380program's design.
a1bafe90
EK
381
382Many software design pattern are aimed at lowering the coupling between
383different classes and different layers of logic. One of the most famous is
fe0a4c48
EK
384perhaps the \pattern{Model-View-Controller}\citing{designPatterns} pattern. It
385is aimed at lowering the coupling between the user interface, the business logic
b4e539f7
EK
386and the data representation of a program. This also has the added benefit that
387the business logic could much easier be the target of automated tests, thus
388increasing the productivity in the software development process.
0b0567f2
EK
389
390Another effect of refactoring is that with the increased separation of concerns
a1bafe90
EK
391coming out of many refactorings, the \emph{performance} can be improved. When
392profiling programs, the problematic parts are narrowed down to smaller parts of
393the code, which are easier to tune, and optimization can be performed only where
b4e539f7 394needed and in a more effective way\citing{refactoring}.
137e0e7b 395
b01d328a
EK
396Last, but not least, and this should probably be the best reason to refactor, is
397to refactor to \emph{facilitate a program change}. If one has managed to keep
398one's code clean and tidy, and the code is not bloated with design patterns that
a1bafe90 399are not ever going to be needed, then some refactoring might be needed to
b01d328a
EK
400introduce a design pattern that is appropriate for the change that is going to
401happen.
402
137e0e7b
EK
403Refactoring program code --- with a goal in mind --- can give the code itself
404more value. That is in the form of robustness to bugs, understandability and
a1bafe90
EK
405maintainability. Having robust code is an obvious advantage, but
406understandability and maintainability are both very important aspects of
407software development. By incorporating refactoring in the development process,
408bugs are found faster, new functionality is added more easily and code is easier
409to understand by the next person exposed to it, which might as well be the
410person who wrote it. The consequence of this, is that refactoring can increase
411the average productivity of the development process, and thus also add to the
412monetary value of a business in the long run. The perspective on productivity
413and money should also be able to open the eyes of the many nearsighted managers
414that seldom see beyond the next milestone.
137e0e7b 415
b01d328a 416\section{The magical number seven}\label{magic_number_seven}
fe0a4c48
EK
417The article \tit{The magical number seven, plus or minus two: some limits on our
418capacity for processing information}\citing{miller1956} by George A. Miller,
419was published in the journal \name{Psychological Review} in 1956. It presents
420evidence that support that the capacity of the number of objects a human being
421can hold in its working memory is roughly seven, plus or minus two objects. This
422number varies a bit depending on the nature and complexity of the objects, but
423is according to Miller ``\ldots never changing so much as to be
f4cea2d6
EK
424unrecognizable.''
425
426Miller's article culminates in the section called \emph{Recoding}, a term he
427borrows from communication theory. The central result in this section is that by
428recoding information, the capacity of the amount of information that a human can
429process at a time is increased. By \emph{recoding}, Miller means to group
b4e539f7
EK
430objects together in chunks, and give each chunk a new name that it can be
431remembered by.
f4cea2d6
EK
432
433\begin{quote}
434 \ldots recoding is an extremely powerful weapon for increasing the amount of
4cb06723 435 information that we can deal with.~\cite[p.~95]{miller1956}
f4cea2d6 436\end{quote}
ee45c41f 437
b4e539f7
EK
438By organizing objects into patterns of ever growing depth, one can memorize and
439process a much larger amount of data than if it were to be represented as its
440basic pieces. This grouping and renaming is analogous to how many refactorings
441work, by grouping pieces of code and give them a new name. Examples are the
fe0a4c48 442fundamental \ExtractMethod and \refa{Extract Class}
b4e539f7
EK
443refactorings\citing{refactoring}.
444
a1bafe90 445An example from the article addresses the problem of memorizing a sequence of
b4e539f7
EK
446binary digits. The example presented here is a slightly modified version of the
447one presented in the original article\citing{miller1956}, but it preserves the
3ab3e132 448essence of it. Let us say we have the following sequence of
f4cea2d6
EK
44916 binary digits: ``1010001001110011''. Most of us will have a hard time
450memorizing this sequence by only reading it once or twice. Imagine if we instead
451translate it to this sequence: ``A273''. If you have a background from computer
b4e539f7
EK
452science, it will be obvious that the latter sequence is the first sequence
453recoded to be represented by digits in base 16. Most people should be able to
f4cea2d6
EK
454memorize this last sequence by only looking at it once.
455
456Another result from the Miller article is that when the amount of information a
457human must interpret increases, it is crucial that the translation from one code
458to another must be almost automatic for the subject to be able to remember the
0d7fbd88 459translation, before \heshe is presented with new information to recode. Thus
f4cea2d6
EK
460learning and understanding how to best organize certain kinds of data is
461essential to efficiently handle that kind of data in the future. This is much
a1bafe90
EK
462like when humans learn to read. First they must learn how to recognize letters.
463Then they can learn distinct words, and later read sequences of words that form
464whole sentences. Eventually, most of them will be able to read whole books and
465briefly retell the important parts of its content. This suggest that the use of
b4e539f7
EK
466design patterns is a good idea when reasoning about computer programs. With
467extensive use of design patterns when creating complex program structures, one
468does not always have to read whole classes of code to comprehend how they
469function, it may be sufficient to only see the name of a class to almost fully
470understand its responsibilities.
f4cea2d6
EK
471
472\begin{quote}
473 Our language is tremendously useful for repackaging material into a few chunks
4cb06723 474 rich in information.~\cite[p.~95]{miller1956}
f4cea2d6 475\end{quote}
ee45c41f 476
a1bafe90 477Without further evidence, these results at least indicate that refactoring
f4cea2d6
EK
478source code into smaller units with higher cohesion and, when needed,
479introducing appropriate design patterns, should aid in the cause of creating
b4e539f7 480computer programs that are easier to maintain and have code that is easier (and
f4cea2d6
EK
481better) understood.
482
740e1b6c 483\section{Notable contributions to the refactoring literature}
bd94b131 484\todoin{Thinking Forth?}
36d99783 485
d21ef41f
EK
486\begin{description}
487 \item[1992] William F. Opdyke submits his doctoral dissertation called
fe0a4c48
EK
488 \tit{Refactoring Object-Oriented Frameworks}\citing{opdyke1992}. This work
489 defines a set of refactorings, that are behavior preserving given that their
490 preconditions are met. The dissertation is focused on the automation of
491 refactorings.
492 \item[1999] Martin Fowler et al.: \tit{Refactoring: Improving the Design of
b5c7bb1b
EK
493 Existing Code}\citing{refactoring}. This is maybe the most influential text
494 on refactoring. It bares similarities with Opdykes thesis\citing{opdyke1992}
d21ef41f
EK
495 in the way that it provides a catalog of refactorings. But Fowler's book is
496 more about the craft of refactoring, as he focuses on establishing a
497 vocabulary for refactoring, together with the mechanics of different
498 refactorings and when to perform them. His methodology is also founded on
fe0a4c48
EK
499 the principles of test-driven development.
500 \item[2005] Joshua Kerievsky: \tit{Refactoring to
36d99783 501 Patterns}\citing{kerievsky2005}. This book is heavily influenced by Fowler's
fe0a4c48 502 \tit{Refactoring}\citing{refactoring} and the ``Gang of Four'' \tit{Design
36d99783
EK
503 Patterns}\citing{designPatterns}. It is building on the refactoring
504 catalogue from Fowler's book, but is trying to bridge the gap between
505 \emph{refactoring} and \emph{design patterns} by providing a series of
506 higher-level composite refactorings, that makes code evolve toward or away
fe0a4c48 507 from certain design patterns. The book is trying to build up the reader's
36d99783
EK
508 intuition around \emph{why} one would want to use a particular design
509 pattern, and not just \emph{how}. The book is encouraging evolutionary
e123ab03 510 design \see{relationToDesignPatterns}.
d21ef41f 511\end{description}
3b7c1d90 512
110dae91 513\section{Tool support (for Java)}\label{toolSupport}
3ab3e132 514This section will briefly compare the refactoring support of the three IDEs
fe0a4c48
EK
515\name{Eclipse}\footnote{\url{http://www.eclipse.org/}}, \name{IntelliJ
516IDEA}\footnote{The IDE under comparison is the \name{Community Edition},
4e135659 517\url{http://www.jetbrains.com/idea/}} and
fe0a4c48 518\name{NetBeans}\footnote{\url{https://netbeans.org/}}. These are the most
4e135659
EK
519popular Java IDEs\citing{javaReport2011}.
520
521All three IDEs provide support for the most useful refactorings, like the
522different extract, move and rename refactorings. In fact, Java-targeted IDEs are
523known for their good refactoring support, so this did not appear as a big
524surprise.
525
526The IDEs seem to have excellent support for the \ExtractMethod refactoring, so
b4e539f7
EK
527at least they have all passed the first ``refactoring
528rubicon''\citing{fowlerRubicon2001,secondRubicon2012}.
4e135659 529
b4a197fd
EK
530Regarding the \MoveMethod refactoring, the \name{Eclipse} and \name{IntelliJ}
531IDEs do the job in very similar manners. In most situations they both do a
532satisfying job by producing the expected outcome. But they do nothing to check
533that the result does not break the semantics of the program \see{correctness}.
fe0a4c48 534The \name{NetBeans} IDE implements this refactoring in a somewhat
b4e539f7
EK
535unsophisticated way. For starters, the refactoring's default destination for the
536move, is the same class as the method already resides in, although it refuses to
537perform the refactoring if chosen. But the worst part is, that if moving the
538method \method{f} of the class \type{C} to the class \type{X}, it will break the
539code. The result is shown in \myref{lst:moveMethod_NetBeans}.
4e135659 540
347ed677
EK
541\begin{listing}
542\begin{multicols}{2}
4e135659
EK
543\begin{minted}[samepage]{java}
544public class C {
545 private X x;
546 ...
547 public void f() {
548 x.m();
549 x.n();
550 }
551}
552\end{minted}
553
347ed677 554\columnbreak
4e135659
EK
555
556\begin{minted}[samepage]{java}
557public class X {
558 ...
559 public void f(C c) {
560 c.x.m();
561 c.x.n();
562 }
563}
564\end{minted}
347ed677
EK
565\end{multicols}
566\caption{Moving method \method{f} from \type{C} to \type{X}.}
567\label{lst:moveMethod_NetBeans}
568\end{listing}
4e135659 569
fe0a4c48 570\name{NetBeans} will try to create code that call the methods \method{m} and \method{n}
4e135659 571of \type{X} by accessing them through \var{c.x}, where \var{c} is a parameter of
a1bafe90
EK
572type \type{C} that is added the method \method{f} when it is moved. (This is
573seldom the desired outcome of this refactoring, but ironically, this ``feature''
fe0a4c48 574keeps \name{NetBeans} from breaking the code in the example from \myref{correctness}.)
8b6b22c8 575If \var{c.x} for some reason is inaccessible to \type{X}, as in this case, the
fe0a4c48 576refactoring breaks the code, and it will not compile. \name{NetBeans} presents a
8b6b22c8
EK
577preview of the refactoring outcome, but the preview does not catch it if the IDE
578is about break the program.
4778044b 579
b4e539f7 580The IDEs under investigation seem to have fairly good support for primitive
fe0a4c48
EK
581refactorings, but what about more complex ones, such as
582\gloss{extractClass}\citing{refactoring}? \name{IntelliJ} handles this in a
583fairly good manner, although, in the case of private methods, it leaves unused
a1bafe90 584methods behind. These are methods that delegate to a field with the type of the
fe0a4c48
EK
585new class, but are not used anywhere. \name{Eclipse} has added its own quirk to
586the \refa{Extract Class} refactoring, and only allows for \emph{fields} to be
587moved to a new class, \emph{not methods}. This makes it effectively only
588extracting a data structure, and calling it \refa{Extract Class} is a little
b4e539f7 589misleading. One would often be better off with textual extract and paste than
fe0a4c48
EK
590using the \refa{Extract Class} refactoring in \name{Eclipse}. When it comes to
591\name{NetBeans}, it does not even show an attempt on providing this refactoring.
4778044b 592
36d99783 593\section{The relation to design patterns}\label{relationToDesignPatterns}
4cb06723 594
fe0a4c48
EK
595Refactoring and design patterns have at least one thing in common, they are both
596promoted by advocates of \emph{clean code}\citing{cleanCode} as fundamental
597tools on the road to more maintainable and extendable source code.
4cb06723
EK
598
599\begin{quote}
600 Design patterns help you determine how to reorganize a design, and they can
601 reduce the amount of refactoring you need to do
602 later.~\cite[p.~353]{designPatterns}
603\end{quote}
604
a1bafe90
EK
605Although sometimes associated with
606over-engineering\citing{kerievsky2005,refactoring}, design patterns are in
607general assumed to be good for maintainability of source code. That may be
d1adbeef
EK
608because many of them are designed to support the \emph{open/closed principle} of
609object-oriented programming. The principle was first formulated by Bertrand
610Meyer, the creator of the Eiffel programming language, like this: ``Modules
611should be both open and closed.''\citing{meyer1988} It has been popularized,
612with this as a common version:
613
614\begin{quote}
615 Software entities (classes, modules, functions, etc.) should be open for
616 extension, but closed for modification.\footnote{See
617 \url{http://c2.com/cgi/wiki?OpenClosedPrinciple} or
618 \url{https://en.wikipedia.org/wiki/Open/closed_principle}}
619\end{quote}
620
621Maintainability is often thought of as the ability to be able to introduce new
a1bafe90 622functionality without having to change too much of the old code. When
d1adbeef
EK
623refactoring, the motivation is often to facilitate adding new functionality. It
624is about factoring the old code in a way that makes the new functionality being
625able to benefit from the functionality already residing in a software system,
626without having to copy old code into new. Then, next time someone shall add new
a1bafe90
EK
627functionality, it is less likely that the old code has to change. Assuming that
628a design pattern is the best way to get rid of duplication and assist in
629implementing new functionality, it is reasonable to conclude that a design
630pattern often is the target of a series of refactorings. Having a repertoire of
631design patterns can also help in knowing when and how to refactor a program to
632make it reflect certain desired characteristics.
d1adbeef
EK
633
634\begin{quote}
a1bafe90 635 There is a natural relation between patterns and refactorings. Patterns are
d1adbeef
EK
636 where you want to be; refactorings are ways to get there from somewhere
637 else.~\cite[p.~107]{refactoring}
638\end{quote}
639
640This quote is wise in many contexts, but it is not always appropriate to say
a1bafe90
EK
641``Patterns are where you want to be\ldots''. \emph{Sometimes}, patterns are
642where you want to be, but only because it will benefit your design. It is not
643true that one should always try to incorporate as many design patterns as
644possible into a program. It is not like they have intrinsic value. They only add
645value to a system when they support its design. Otherwise, the use of design
646patterns may only lead to a program that is more complex than necessary.
d1adbeef
EK
647
648\begin{quote}
649 The overuse of patterns tends to result from being patterns happy. We are
650 \emph{patterns happy} when we become so enamored of patterns that we simply
651 must use them in our code.~\cite[p.~24]{kerievsky2005}
652\end{quote}
653
654This can easily happen when relying largely on up-front design. Then it is
655natural, in the very beginning, to try to build in all the flexibility that one
656believes will be necessary throughout the lifetime of a software system.
657According to Joshua Kerievsky ``That sounds reasonable --- if you happen to be
658psychic.''~\cite[p.~1]{kerievsky2005} He is advocating what he believes is a
659better approach: To let software continually evolve. To start with a simple
660design that meets today's needs, and tackle future needs by refactoring to
661satisfy them. He believes that this is a more economic approach than investing
662time and money into a design that inevitably is going to change. By relying on
663continuously refactoring a system, its design can be made simpler without
664sacrificing flexibility. To be able to fully rely on this approach, it is of
e123ab03 665utter importance to have a reliable suit of tests to lean on \see{testing}. This
d1adbeef
EK
666makes the design process more natural and less characterized by difficult
667decisions that has to be made before proceeding in the process, and that is
668going to define a project for all of its unforeseeable future.
669
b289552b
EK
670\begin{comment}
671
137e0e7b
EK
672\section{Classification of refactorings}
673% only interesting refactorings
674% with 2 detailed examples? One for structured and one for intra-method?
675% Is replacing Bubblesort with Quick Sort considered a refactoring?
676
677\subsection{Structural refactorings}
678
f65da046 679\subsubsection{Primitive refactorings}
137e0e7b
EK
680
681% Composing Methods
682\explanation{Extract Method}{You have a code fragment that can be grouped
683together.}{Turn the fragment into a method whose name explains the purpose of
684the method.}
685
686\explanation{Inline Method}{A method's body is just as clear as its name.}{Put
687the method's body into the body of its callers and remove the method.}
688
689\explanation{Inline Temp}{You have a temp that is assigned to once with a simple
690expression, and the temp is getting in the way of other refactorings.}{Replace
691all references to that temp with the expression}
692
693% Moving Features Between Objects
694\explanation{Move Method}{A method is, or will be, using or used by more
695features of another class than the class on which it is defined.}{Create a new
696method with a similar body in the class it uses most. Either turn the old method
697into a simple delegation, or remove it altogether.}
698
699\explanation{Move Field}{A field is, or will be, used by another class more than
700the class on which it is defined}{Create a new field in the target class, and
701change all its users.}
702
703% Organizing Data
704\explanation{Replace Magic Number with Symbolic Constant}{You have a literal
705number with a particular meaning.}{Create a constant, name it after the meaning,
706and replace the number with it.}
707
708\explanation{Encapsulate Field}{There is a public field.}{Make it private and
709provide accessors.}
710
711\explanation{Replace Type Code with Class}{A class has a numeric type code that
8fae7b44 712does not affect its behavior.}{Replace the number with a new class.}
137e0e7b
EK
713
714\explanation{Replace Type Code with Subclasses}{You have an immutable type code
8fae7b44 715that affects the behavior of a class.}{Replace the type code with subclasses.}
137e0e7b
EK
716
717\explanation{Replace Type Code with State/Strategy}{You have a type code that
8fae7b44 718affects the behavior of a class, but you cannot use subclassing.}{Replace the
137e0e7b
EK
719type code with a state object.}
720
721% Simplifying Conditional Expressions
722\explanation{Consolidate Duplicate Conditional Fragments}{The same fragment of
8fae7b44 723code is in all branches of a conditional expression.}{Move it outside of the
137e0e7b
EK
724expression.}
725
726\explanation{Remove Control Flag}{You have a variable that is acting as a
727control flag fro a series of boolean expressions.}{Use a break or return
728instead.}
729
730\explanation{Replace Nested Conditional with Guard Clauses}{A method has
8fae7b44 731conditional behavior that does not make clear the normal path of
137e0e7b
EK
732execution.}{Use guard clauses for all special cases.}
733
8fae7b44 734\explanation{Introduce Null Object}{You have repeated checks for a null
137e0e7b
EK
735value.}{Replace the null value with a null object.}
736
737\explanation{Introduce Assertion}{A section of code assumes something about the
738state of the program.}{Make the assumption explicit with an assertion.}
739
740% Making Method Calls Simpler
741\explanation{Rename Method}{The name of a method does not reveal its
742purpose.}{Change the name of the method}
743
744\explanation{Add Parameter}{A method needs more information from its
745caller.}{Add a parameter for an object that can pass on this information.}
746
747\explanation{Remove Parameter}{A parameter is no longer used by the method
748body.}{Remove it.}
749
750%\explanation{Parameterize Method}{Several methods do similar things but with
751%different values contained in the method.}{Create one method that uses a
752%parameter for the different values.}
753
754\explanation{Preserve Whole Object}{You are getting several values from an
755object and passing these values as parameters in a method call.}{Send the whole
756object instead.}
757
758\explanation{Remove Setting Method}{A field should be set at creation time and
759never altered.}{Remove any setting method for that field.}
760
761\explanation{Hide Method}{A method is not used by any other class.}{Make the
762method private.}
763
8fae7b44
EK
764\explanation{Replace Constructor with Factory Method}{You want to do more than
765simple construction when you create an object}{Replace the constructor with a
137e0e7b
EK
766factory method.}
767
768% Dealing with Generalization
8fae7b44 769\explanation{Pull Up Field}{Two subclasses have the same field.}{Move the field
137e0e7b
EK
770to the superclass.}
771
772\explanation{Pull Up Method}{You have methods with identical results on
773subclasses.}{Move them to the superclass.}
774
8fae7b44 775\explanation{Push Down Method}{Behavior on a superclass is relevant only for
137e0e7b
EK
776some of its subclasses.}{Move it to those subclasses.}
777
778\explanation{Push Down Field}{A field is used only by some subclasses.}{Move the
779field to those subclasses}
780
781\explanation{Extract Interface}{Several clients use the same subset of a class's
8fae7b44 782interface, or two classes have part of their interfaces in common.}{Extract the
137e0e7b
EK
783subset into an interface.}
784
785\explanation{Replace Inheritance with Delegation}{A subclass uses only part of a
786superclasses interface or does not want to inherit data.}{Create a field for the
787superclass, adjust methods to delegate to the superclass, and remove the
788subclassing.}
789
790\explanation{Replace Delegation with Inheritance}{You're using delegation and
791are often writing many simple delegations for the entire interface}{Make the
792delegating class a subclass of the delegate.}
793
794\subsubsection{Composite refactorings}
795
796% Composing Methods
797% \explanation{Replace Method with Method Object}{}{}
798
799% Moving Features Between Objects
800\explanation{Extract Class}{You have one class doing work that should be done by
801two}{Create a new class and move the relevant fields and methods from the old
802class into the new class.}
803
804\explanation{Inline Class}{A class isn't doing very much.}{Move all its features
805into another class and delete it.}
806
807\explanation{Hide Delegate}{A client is calling a delegate class of an
808object.}{Create Methods on the server to hide the delegate.}
809
810\explanation{Remove Middle Man}{A class is doing to much simple delegation.}{Get
811the client to call the delegate directly.}
812
813% Organizing Data
814\explanation{Replace Data Value with Object}{You have a data item that needs
8fae7b44 815additional data or behavior.}{Turn the data item into an object.}
137e0e7b
EK
816
817\explanation{Change Value to Reference}{You have a class with many equal
818instances that you want to replace with a single object.}{Turn the object into a
819reference object.}
820
821\explanation{Encapsulate Collection}{A method returns a collection}{Make it
8fae7b44 822return a read-only view and provide add/remove methods.}
137e0e7b
EK
823
824% \explanation{Replace Array with Object}{}{}
825
826\explanation{Replace Subclass with Fields}{You have subclasses that vary only in
827methods that return constant data.}{Change the methods to superclass fields and
828eliminate the subclasses.}
829
830% Simplifying Conditional Expressions
831\explanation{Decompose Conditional}{You have a complicated conditional
832(if-then-else) statement.}{Extract methods from the condition, then part, an
833else part.}
834
835\explanation{Consolidate Conditional Expression}{You have a sequence of
836conditional tests with the same result.}{Combine them into a single conditional
837expression and extract it.}
838
839\explanation{Replace Conditional with Polymorphism}{You have a conditional that
8fae7b44 840chooses different behavior depending on the type of an object.}{Move each leg
137e0e7b
EK
841of the conditional to an overriding method in a subclass. Make the original
842method abstract.}
843
844% Making Method Calls Simpler
845\explanation{Replace Parameter with Method}{An object invokes a method, then
846passes the result as a parameter for a method. The receiver can also invoke this
847method.}{Remove the parameter and let the receiver invoke the method.}
848
849\explanation{Introduce Parameter Object}{You have a group of parameters that
850naturally go together.}{Replace them with an object.}
851
852% Dealing with Generalization
853\explanation{Extract Subclass}{A class has features that are used only in some
854instances.}{Create a subclass for that subset of features.}
855
856\explanation{Extract Superclass}{You have two classes with similar
857features.}{Create a superclass and move the common features to the
858superclass.}
859
860\explanation{Collapse Hierarchy}{A superclass and subclass are not very
861different.}{Merge them together.}
862
863\explanation{Form Template Method}{You have two methods in subclasses that
864perform similar steps in the same order, yet the steps are different.}{Get the
865steps into methods with the same signature, so that the original methods become
866the same. Then you can pull them up.}
867
868
869\subsection{Functional refactorings}
870
871\explanation{Substitute Algorithm}{You want to replace an algorithm with one
872that is clearer.}{Replace the body of the method with the new algorithm.}
00aa0588 873
b289552b 874\end{comment}
00aa0588
EK
875
876\section{The impact on software quality}
877
a1bafe90 878\subsection{What is software quality?}
00aa0588 879The term \emph{software quality} has many meanings. It all depends on the
9a55a5bc 880context we put it in. If we look at it with the eyes of a software developer, it
a1bafe90 881usually means that the software is easily maintainable and testable, or in other
9a55a5bc
EK
882words, that it is \emph{well designed}. This often correlates with the
883management scale, where \emph{keeping the schedule} and \emph{customer
137e0e7b
EK
884satisfaction} is at the center. From the customers point of view, in addition to
885good usability, \emph{performance} and \emph{lack of bugs} is always
886appreciated, measurements that are also shared by the software developer. (In
887addition, such things as good documentation could be measured, but this is out
888of the scope of this document.)
9a55a5bc 889
00aa0588 890\subsection{The impact on performance}
9a55a5bc 891\begin{quote}
a1bafe90
EK
892 Refactoring certainly will make software go more slowly\footnote{With todays
893 compiler optimization techniques and performance tuning of e.g. the Java
894virtual machine, the penalties of object creation and method calls are
895debatable.}, but it also makes the software more amenable to performance
896tuning.~\cite[p.~69]{refactoring}
9a55a5bc 897\end{quote}
ee45c41f
EK
898
899\noindent There is a common belief that refactoring compromises performance, due
900to increased degree of indirection and that polymorphism is slower than
9a55a5bc
EK
901conditionals.
902
b5c7bb1b 903In a survey, Demeyer\citing{demeyer2002} disproves this view in the case of
a1bafe90 904polymorphism. He did an experiment on, what he calls, ``Transform Self Type
9a55a5bc
EK
905Checks'' where you introduce a new polymorphic method and a new class hierarchy
906to get rid of a class' type checking of a ``type attribute``. He uses this kind
907of transformation to represent other ways of replacing conditionals with
908polymorphism as well. The experiment is performed on the C++ programming
a1bafe90
EK
909language and with three different compilers and platforms. Demeyer concludes
910that, with compiler optimization turned on, polymorphism beats middle to large
911sized if-statements and does as well as case-statements. (In accordance with
912his hypothesis, due to similarities between the way C++ handles polymorphism and
913case-statements.)
ee45c41f 914
9a55a5bc
EK
915\begin{quote}
916 The interesting thing about performance is that if you analyze most programs,
b5c7bb1b 917 you find that they waste most of their time in a small fraction of the
4cb06723 918 code.~\cite[p.~70]{refactoring}
9a55a5bc 919\end{quote}
9a55a5bc 920
ee45c41f
EK
921\noindent So, although an increased amount of method calls could potentially
922slow down programs, one should avoid premature optimization and sacrificing good
fe0a4c48
EK
923design, leaving the performance tuning until after \gloss{profiling} the
924software and having isolated the actual problem areas.
00aa0588 925
0d7fbd88 926\section{Composite refactorings}\label{compositeRefactorings}
f3a108c3
EK
927\todo{motivation, examples, manual vs automated?, what about refactoring in a
928very large code base?}
6065c96c 929Generally, when thinking about refactoring, at the mechanical level, there are
f65da046 930essentially two kinds of refactorings. There are the \emph{primitive}
a1bafe90 931refactorings, and the \emph{composite} refactorings.
6065c96c 932
6c51af15
EK
933\definition{A \emph{primitive refactoring} is a refactoring that cannot be
934expressed in terms of other refactorings.}
f65da046 935
fe0a4c48 936\noindent Examples are the \refa{Pull Up Field} and \refa{Pull Up
a1bafe90 937Method} refactorings\citing{refactoring}, that move members up in their class
ee45c41f
EK
938hierarchies.
939
6c51af15
EK
940\definition{A \emph{composite refactoring} is a refactoring that can be
941expressed in terms of two or more other refactorings.}
f65da046 942
fe0a4c48 943\noindent An example of a composite refactoring is the \refa{Extract
b5c7bb1b
EK
944Superclass} refactoring\citing{refactoring}. In its simplest form, it is composed
945of the previously described primitive refactorings, in addition to the
fe0a4c48 946\refa{Pull Up Constructor Body} refactoring\citing{refactoring}. It works
b5c7bb1b 947by creating an abstract superclass that the target class(es) inherits from, then
fe0a4c48
EK
948by applying \refa{Pull Up Field}, \refa{Pull Up Method} and
949\refa{Pull Up Constructor Body} on the members that are to be members of
f5fb40e4
EK
950the new superclass. If there are multiple classes in play, their interfaces may
951need to be united with the help of some rename refactorings, before extracting
fe0a4c48 952the superclass. For an overview of the \refa{Extract Superclass}
8b6b22c8 953refactoring, see \myref{fig:extractSuperclass}.
6065c96c 954
ddcea0b5
EK
955\begin{figure}[h]
956 \centering
faa9f4f3 957 \includegraphics[angle=270,width=\linewidth]{extractSuperclassItalic.pdf}
f5fb40e4 958 \caption{The Extract Superclass refactoring, with united interfaces.}
ddcea0b5
EK
959 \label{fig:extractSuperclass}
960\end{figure}
6065c96c
EK
961
962\section{Manual vs. automated refactorings}
0d7fbd88 963Refactoring is something every programmer does, even if \heshe does not known
f65da046
EK
964the term \emph{refactoring}. Every refinement of source code that does not alter
965the program's behavior is a refactoring. For small refactorings, such as
0d7fbd88 966\ExtractMethod, executing it manually is a manageable task, but is still prone
a1bafe90 967to errors. Getting it right the first time is not easy, considering the method
f65da046
EK
968signature and all the other aspects of the refactoring that has to be in place.
969
f5fb40e4
EK
970Consider the renaming of classes, methods and fields. For complex programs these
971refactorings are almost impossible to get right. Attacking them with textual
972search and replace, or even regular expressions, will fall short on these tasks.
973Then it is crucial to have proper tool support that can perform them
974automatically. Tools that can parse source code and thus have semantic knowledge
975about which occurrences of which names belong to what construct in the program.
976For even trying to perform one of these complex task manually, one would have to
977be very confident on the existing test suite \see{testing}.
00aa0588 978
19c4f27d 979\section{Correctness of refactorings}\label{correctness}
f65da046 980For automated refactorings to be truly useful, they must show a high degree of
f5fb40e4
EK
981behavior preservation. This last sentence might seem obvious, but there are
982examples of refactorings in existing tools that break programs. In an ideal
983world, every automated refactoring would be ``complete'', in the sense that it
984would never break a program. In an ideal world, every program would also be free
985from bugs. In modern IDEs the implemented automated refactorings are working for
986\emph{most} cases, that is enough for making them useful.
987
988I will now present an example of a \emph{corner case} where a program breaks
989when a refactoring is applied. The example shows an \ExtractMethod refactoring
990followed by a \MoveMethod refactoring that breaks a program in both the
fe0a4c48 991\name{Eclipse} and \name{IntelliJ} IDEs\footnote{The \name{NetBeans} IDE handles this
3ab3e132 992 particular situation without altering the program's behavior, mainly because
fe0a4c48 993 its \refa{Move Method} refactoring implementation is a bit flawed in other ways
f5fb40e4
EK
994 \see{toolSupport}.}. The target and the destination for the composed
995 refactoring is shown in \myref{lst:correctnessExtractAndMove}. Note that the
996 method \method{m(C c)} of class \type{X} assigns to the field \var{x} of the
997 argument \var{c} that has type \type{C}.
998
999\begin{listing}[h]
1000\begin{multicols}{2}
1001\begin{minted}[linenos]{java}
1002// Refactoring target
ddcea0b5 1003public class C {
f5fb40e4 1004 public X x = new X();
ee45c41f 1005
f5fb40e4
EK
1006 public void f() {
1007 x.m(this);
1008 // Not the same x
1009 x.n();
1010 }
ddcea0b5
EK
1011}
1012\end{minted}
ee45c41f 1013
f5fb40e4 1014\columnbreak
ee45c41f 1015
f5fb40e4
EK
1016\begin{minted}[]{java}
1017// Method destination
ee45c41f 1018public class X {
f5fb40e4
EK
1019 public void m(C c) {
1020 c.x = new X();
1021 // If m is called from
1022 // c, then c.x no longer
1023 // equals 'this'
1024 }
1025 public void n() {}
ee45c41f
EK
1026}
1027\end{minted}
f5fb40e4
EK
1028\end{multicols}
1029\caption{The target and the destination for the composition of the Extract
fe0a4c48 1030Method and \refa{Move Method} refactorings.}
f5fb40e4
EK
1031\label{lst:correctnessExtractAndMove}
1032\end{listing}
ee45c41f 1033
f5fb40e4
EK
1034
1035The refactoring sequence works by extracting line 6 through 8 from the original
3510e539 1036class \type{C} into a method \method{f} with the statements from those lines as
f5fb40e4
EK
1037its method body (but with the comment left out, since it will no longer hold any
1038meaning). The method is then moved to the class \type{X}. The result is shown
1039in \myref{lst:correctnessExtractAndMoveResult}.
ee45c41f 1040
f5fb40e4
EK
1041Before the refactoring, the methods \method{m} and \method{n} of class \type{X}
1042are called on different object instances (see line 6 and 8 of the original class
1043\type{C} in \cref{lst:correctnessExtractAndMove}). After the refactoring, they
1044are called on the same object, and the statement on line
10453 of class \type{X} (in \cref{lst:correctnessExtractAndMoveResult}) no longer
1046 has the desired effect in our example. The method \method{f} of class \type{C}
1047 is now calling the method \method{f} of class \type{X} (see line 5 of class
1048 \type{C} in \cref{lst:correctnessExtractAndMoveResult}), and the program now
1049 behaves different than before.
1050
1051\begin{listing}[h]
1052\begin{multicols}{2}
1053\begin{minted}[linenos]{java}
ee45c41f
EK
1054public class C {
1055 public X x = new X();
1056
1057 public void f() {
1058 x.f(this);
1059 }
1060}
1061\end{minted}
1062
f5fb40e4
EK
1063\columnbreak
1064
1065\begin{minted}[linenos]{java}
ee45c41f
EK
1066public class X {
1067 public void m(C c) {
1068 c.x = new X();
1069 }
1070 public void n() {}
f5fb40e4
EK
1071 // Extracted and
1072 // moved method
ee45c41f
EK
1073 public void f(C c) {
1074 m(c);
1075 n();
1076 }
1077}
1078\end{minted}
f5fb40e4
EK
1079\end{multicols}
1080\caption{The result of the composed refactoring.}
1081\label{lst:correctnessExtractAndMoveResult}
1082\end{listing}
ddcea0b5 1083
aa1e3779
EK
1084The bug introduced in the previous example is of such a nature\footnote{Caused
1085 by aliasing. See \url{https://en.wikipedia.org/wiki/Aliasing_(computing)}}
1086 that it is very difficult to spot if the refactored code is not covered by
1087 tests. It does not generate compilation errors, and will thus only result in
1088 a runtime error or corrupted data, which might be hard to detect.
19c4f27d 1089
29f39f29 1090\section{Refactoring and the importance of testing}\label{testing}
19c4f27d
EK
1091\begin{quote}
1092 If you want to refactor, the essential precondition is having solid
1093 tests.\citing{refactoring}
1094\end{quote}
1095
29f39f29
EK
1096When refactoring, there are roughly three classes of errors that can be made.
1097The first class of errors are the ones that make the code unable to compile.
1098These \emph{compile-time} errors are of the nicer kind. They flash up at the
1099moment they are made (at least when using an IDE), and are usually easy to fix.
1100The second class are the \emph{runtime} errors. Although they take a bit longer
1101to surface, they usually manifest after some time in an illegal argument
1102exception, null pointer exception or similar during the program execution.
1103These kind of errors are a bit harder to handle, but at least they will show,
1104eventually. Then there are the \emph{behavior-changing} errors. These errors are
1105of the worst kind. They do not show up during compilation and they do not turn
1106on a blinking red light during runtime either. The program can seem to work
1107perfectly fine with them in play, but the business logic can be damaged in ways
1108that will only show up over time.
1109
1110For discovering runtime errors and behavior changes when refactoring, it is
1111essential to have good test coverage. Testing in this context means writing
1112automated tests. Manual testing may have its uses, but when refactoring, it is
1113automated unit testing that dominate. For discovering behavior changes it is
1114especially important to have tests that cover potential problems, since these
1115kind of errors does not reveal themselves.
1116
1117Unit testing is not a way to \emph{prove} that a program is correct, but it is a
3ab3e132 1118way to make you confident that it \emph{probably} works as desired. In the
4928aa0b 1119context of test-driven development (commonly known as TDD), the tests are even a
29f39f29
EK
1120way to define how the program is \emph{supposed} to work. It is then, by
1121definition, working if the tests are passing.
19c4f27d
EK
1122
1123If the test coverage for a code base is perfect, then it should, theoretically,
29f39f29
EK
1124be risk-free to perform refactorings on it. This is why automated tests and
1125refactoring are such a great match.
f65da046 1126
b5d53f51 1127\subsection{Testing the code from correctness section}
29f39f29
EK
1128The worst thing that can happen when refactoring is to introduce changes to the
1129behavior of a program, as in the example on \myref{correctness}. This example
1130may be trivial, but the essence is clear. The only problem with the example is
1131that it is not clear how to create automated tests for it, without changing it
1132in intrusive ways.
1133
20bcc7bf 1134Unit tests, as they are known from the different \glosspl{xUnit} around, are
29f39f29
EK
1135only suitable to test the \emph{result} of isolated operations. They can not
1136easily (if at all) observe the \emph{history} of a program.
b5d53f51 1137
a13e5650 1138This problem is still open.
116805bf 1139
a13e5650 1140\begin{comment}
116805bf
EK
1141
1142Assuming a sequential (non-concurrent) program:
1143
1144\begin{minted}{java}
1145tracematch (C c, X x) {
1146 sym m before:
1147 call(* X.m(C)) && args(c) && cflow(within(C));
1148 sym n before:
1149 call(* X.n()) && target(x) && cflow(within(C));
1150 sym setCx after:
1151 set(C.x) && target(c) && !cflow(m);
1152
1153 m n
1154
1155 { assert x == c.x; }
1156}
1157\end{minted}
1158
1159%\begin{minted}{java}
1160%tracematch (X x1, X x2) {
1161% sym m before:
1162% call(* X.m(C)) && target(x1);
1163% sym n before:
1164% call(* X.n()) && target(x2);
1165% sym setX after:
1166% set(C.x) && !cflow(m) && !cflow(n);
1167%
1168% m n
1169%
1170% { assert x1 != x2; }
1171%}
1172%\end{minted}
a13e5650 1173\end{comment}
116805bf 1174
222d172b
EK
1175
1176\chapter{The Project}
1177
0e6e57d3 1178\section{Project description}
60065669 1179The aim of this master's project will be to explore the relationship between the
0fe1a611
EK
1180\ExtractMethod and the \MoveMethod refactorings. This will be done by composing
1181the two into a composite refactoring. The refactoring will be called the
6a779665
EK
1182\ExtractAndMoveMethod refactoring.
1183
1184The two primitive \ExtractMethod and \MoveMethod refactorings must already be
1185implemented in a tool, so the \ExtractAndMoveMethod refactoring is going to be
1186built on top of those.
b5d53f51 1187
a13e5650
EK
1188The composition of the \ExtractMethod and \MoveMethod refactorings springs
1189naturally out of the need to move procedures closer to the data they manipulate.
1190This composed refactoring is not well described in the literature, but it is
1191implemented in at least one tool called
fe0a4c48
EK
1192\name{CodeRush}\footnote{\url{https://help.devexpress.com/\#CodeRush/CustomDocument3519}},
1193that is an extension for \name{MS Visual
b5d53f51 1194Studio}\footnote{\url{http://www.visualstudio.com/}}. In CodeRush it is called
fe0a4c48 1195\refa{Extract Method to
b5d53f51 1196Type}\footnote{\url{https://help.devexpress.com/\#CodeRush/CustomDocument6710}},
0e6e57d3 1197but I choose to call it \ExtractAndMoveMethod, since I feel this better
b5d53f51
EK
1198communicates which primitive refactorings it is composed of.
1199
0fe1a611 1200The project will consist of implementing the \ExtractAndMoveMethod refactoring,
0e6e57d3 1201as well as executing it over a larger code base, as a case study. To be able to
0fe1a611 1202execute the refactoring automatically, I have to make it analyze code to
0e6e57d3 1203determine the best selections to extract into new methods.
70905ddc 1204
021508ad
EK
1205\section{The primitive refactorings}
1206The refactorings presented here are the primitive refactorings used in this
1207project. They are the abstract building blocks used by the \ExtractAndMoveMethod
1208refactoring.
04e21f15
EK
1209
1210\subsection{The Extract Method refactoring}
1211The \refa{Extract Method} refactoring is used to extract a fragment of code
1212from its context and into a new method. A call to the new method is inlined
d516ac0b
EK
1213where the fragment was before. It is used to break code into logical units, with
1214names that explain their purpose.
1215
1216An example of an \ExtractMethod refactoring is shown in
1217\myref{lst:extractMethodRefactoring}. It shows a method containing calls to the
1218methods \method{foo} and \method{bar} of a type \type{X}. These statements are
1219then extracted into the new method \method{fooBar}.
d516ac0b 1220
021508ad 1221\begin{listing}[h]
d516ac0b
EK
1222 \begin{multicols}{2}
1223 \begin{minted}[samepage]{java}
1224 // Before
1225 class C {
1226 void method() {
1227 X x = new X();
1228 x.foo(); x.bar();
1229 }
1230 }
1231 \end{minted}
1232
1233 \columnbreak
1234
1235 \begin{minted}[samepage]{java}
1236 // After
1237 class C {
1238 void method() {
1239 X x = new X();
1240 fooBar(x);
1241 }
d516ac0b
EK
1242 void fooBar(X x) {
1243 x.foo(); x.bar();
1244 }
1245 }
1246 \end{minted}
1247 \end{multicols}
1248 \caption{An example of an \ExtractMethod refactoring.}
1249 \label{lst:extractMethodRefactoring}
1250\end{listing}
04e21f15
EK
1251
1252\subsection{The Move Method refactoring}
1253The \refa{Move Method} refactoring is used to move a method from one class to
4306ef44
EK
1254another. This can be appropriate if the method is using more features of another
1255class than of the class which it is currently defined.
1256
1257\Myref{lst:moveMethodRefactoring} shows an example of this refactoring. Here a
1258method \method{fooBar} is moved from the class \type{C} to the class \type{X}.
1259
021508ad 1260\begin{listing}[h]
4306ef44
EK
1261 \begin{multicols}{2}
1262 \begin{minted}[samepage]{java}
1263 // Before
1264 class C {
1265 void method() {
1266 X x = new X();
1267 fooBar(x);
1268 }
1269 void fooBar(X x) {
1270 x.foo(); x.bar();
1271 }
1272 }
1273
1274 class X {
1275 void foo(){/*...*/}
1276 void bar(){/*...*/}
1277 }
1278 \end{minted}
1279
1280 \columnbreak
1281
1282 \begin{minted}[samepage]{java}
1283 // After
1284 class C {
1285 void method() {
1286 X x = new X();
1287 x.fooBar();
1288 }
1289 }
1290
1291 class X {
1292 void fooBar() {
1293 foo(); bar();
1294 }
1295 void foo(){/*...*/}
1296 void bar(){/*...*/}
1297 }
1298 \end{minted}
1299 \end{multicols}
1300 \caption{An example of a \MoveMethod refactoring.}
1301 \label{lst:moveMethodRefactoring}
1302\end{listing}
04e21f15 1303
021508ad
EK
1304\section{The Extract and Move Method refactoring}
1305The \ExtractAndMoveMethod refactoring is a composite refactoring composed of the
1306primitive \ExtractMethod and \MoveMethod refactorings. The effect of this
1307refactoring on source code is the same as when extracting a method and moving it
1308to another class. Conseptually, this is done without an intermediate step. In
1309practice, as we shall see later, an intermediate step may be necessary.
1310
1311An example of this composite refactoring is shown in
1312\myref{lst:extractAndMoveMethodRefactoring}. The example joins the examples from
1313\cref{lst:extractMethodRefactoring} and \cref{lst:moveMethodRefactoring}. This
1314means that the selection consisting of the consecutive calls to the methods
1315\method{foo} and \method{bar}, is extracted into a new method \method{fooBar}
1316located in the class \type{X}.
1317
1318\begin{listing}[h]
1319 \begin{multicols}{2}
1320 \begin{minted}[samepage]{java}
1321 // Before
1322 class C {
1323 void method() {
1324 X x = new X();
1325 x.foo(); x.bar();
1326 }
1327 }
1328
1329 class X {
1330 void foo(){/*...*/}
1331 void bar(){/*...*/}
1332 }
1333 \end{minted}
1334
1335 \columnbreak
1336
1337 \begin{minted}[samepage]{java}
1338 // After
1339 class C {
1340 void method() {
1341 X x = new X();
1342 x.fooBar();
1343 }
1344 }
04e21f15 1345
021508ad
EK
1346 class X {
1347 void fooBar() {
1348 foo(); bar();
1349 }
1350 void foo(){/*...*/}
1351 void bar(){/*...*/}
1352 }
1353 \end{minted}
1354 \end{multicols}
1355 \caption{An example of the \ExtractAndMoveMethod refactoring.}
1356 \label{lst:extractAndMoveMethodRefactoring}
1357\end{listing}
04e21f15 1358
0fe1a611
EK
1359\section{Research questions}
1360The main question that I seek an answer to in this thesis is:
1361
1362\begin{quote}
1363 Is it possible to automate the analysis and execution of the
1364 \ExtractAndMoveMethod refactoring, and do so for all of the code of a larger
1365 project?
1366\end{quote}
1367
1368\noindent The secondary questions will then be:
1369
1370\paragraph{Can we do this efficiently?} Can we automate the analysis and
1371execution of the refactoring so it can be run in a reasonable amount of time?
1372And what does \emph{reasonable} mean in this context?
1373
1374And, assuming the refactoring does in fact improve the quality of source code:
1375
1376\paragraph{How can the automation of the refactoring be helpful?} What is the
1377usefullness of the refactoring in a software development setting? In what parts
1378of the development process can the refactoring play a role?
1379
3f929fcc 1380\section{Choosing the target language}
70905ddc
EK
1381Choosing which programming language the code that shall be manipulated shall be
1382written in, is not a very difficult task. We choose to limit the possible
1383languages to the object-oriented programming languages, since most of the
1384terminology and literature regarding refactoring comes from the world of
1385object-oriented programming. In addition, the language must have existing tool
1386support for refactoring.
1387
fe0a4c48 1388The \name{Java} programming language\footnote{\url{https://www.java.com/}} is
70905ddc
EK
1389the dominating language when it comes to example code in the literature of
1390refactoring, and is thus a natural choice. Java is perhaps, currently the most
fe0a4c48 1391influential programming language in the world, with its \name{Java Virtual
70905ddc
EK
1392Machine} that runs on all of the most popular architectures and also supports
1393dozens of other programming languages\footnote{They compile to java bytecode.},
fe0a4c48 1394with \name{Scala}, \name{Clojure} and \name{Groovy} as the most prominent ones.
70905ddc
EK
1395Java is currently the language that every other programming language is compared
1396against. It is also the primary programming language for the author of this
1397thesis.
3f929fcc
EK
1398
1399\section{Choosing the tools}
3ab3e132 1400When choosing a tool for manipulating Java, there are certain criteria that
3f929fcc
EK
1401have to be met. First of all, the tool should have some existing refactoring
1402support that this thesis can build upon. Secondly it should provide some kind of
1403framework for parsing and analyzing Java source code. Third, it should itself be
1404open source. This is both because of the need to be able to browse the code for
1405the existing refactorings that is contained in the tool, and also because open
1406source projects hold value in them selves. Another important aspect to consider
1407is that open source projects of a certain size, usually has large communities of
3ab3e132
EK
1408people connected to them, that are committed to answering questions regarding the
1409use and misuse of the products, that to a large degree is made by the community
3f929fcc
EK
1410itself.
1411
3ab3e132 1412There is a certain class of tools that meet these criteria, namely the class of
3f929fcc 1413\emph{IDEs}\footnote{\emph{Integrated Development Environment}}. These are
3ab3e132 1414programs that is meant to support the whole production cycle of a computer
3f929fcc
EK
1415program, and the most popular IDEs that support Java, generally have quite good
1416refactoring support.
1417
fe0a4c48
EK
1418The main contenders for this thesis is the \name{Eclipse IDE}, with the
1419\name{Java development tools} (JDT), the \name{IntelliJ IDEA Community Edition}
1420and the \name{NetBeans IDE} \see{toolSupport}. \name{Eclipse} and
1421\name{NetBeans} are both free, open source and community driven, while the
1422\name{IntelliJ IDEA} has an open sourced community edition that is free of
1423charge, but also offer an \name{Ultimate Edition} with an extended set of
1424features, at additional cost. All three IDEs supports adding plugins to extend
1425their functionality and tools that can be used to parse and analyze Java source
1426code. But one of the IDEs stand out as a favorite, and that is the \name{Eclipse
1427IDE}. This is the most popular\citing{javaReport2011} among them and seems to be
1428de facto standard IDE for Java development regardless of platform.
4e135659 1429
a5317dcf 1430
c894b297 1431\chapter{Semantics}
46f52f52 1432\todoin{Rename chapter?}
c894b297
EK
1433
1434\section{The inputs to the refactoring}
1435For executing an \ExtractAndMoveMethod refactoring, there are two simple
1436requirements. The first thing the refactoring need is a text selection, telling
1437it what to extract. Its second requirement is a target for the subsequent move
1438operation.
1439
1440The extracted method must be called instead of the selection that makes up its
1441body. Also, the method call has to be performed via a variable, since the method
1442is not static. \todo{Explain why static methods are not considered} Therefore,
1443the move target must be a variable in the scope of the extracted selection. The
1444actual new location for the extracted method will be the class representing the
1445type of the move target variable. But, since the method also must be called
1446through a variable, it makes sense to define the move target to be either a
1447local variable or a field in the scope of the text selection.
1448
1449\section{Finding a move target}
1450In the analysis needed to perform the \ExtractAndMoveMethod refactoring
1451automatically, the selection we choose is found among all the selections that
1452has a possible move target. Therefore, the best possible move target must be
1453found for all the candidate selections, so that we are able to sort out the
1454selection that is best suited for the refactoring.
1455
1456To find the best move target for a specific text selection, we first need to
1457find all the possible targets. Since the target must be a local variable or a
1458field, we are basically looking for names within the selection; names that
1459represents references to variables.
1460
1461The names we are looking for, we call prefixes. This is because we are not
1462interested in names that occur in the middle of a dot-separated sequence of
1463names. We are only interested in names that constitutes prefixes of other names,
1464possibly themselves. The reason for this, is that two lexically equal names need
1465not be referencing the same variable, if they themselves are not referenced via
1466the same prefix. Consider the two method calls \code{a.x.foo()} and
1467\code{b.x.foo()}. Here, the two references to \code{x}, in the middle of the
1468qualified names both preceding \code{foo()}, are not referencing the same
1469variable. Even though the variables may share the type, and the method
1470\method{foo} thus is the same for both, we would not know through which of the
1471variables \var{a} or \var{b} we should call the extracted method.
1472
1473The possible move targets are then the prefixes that are not among a subset of
1474the prefixes that are not valid move targets \see{s:unfixes}. Also, prefixes
1475that are just simple names, and have only one occurrence, are left out. This is
1476because they are not going to have any positive effect on coupling between
1477classes.
1478
1479For finding the best move target among these safe prefixes, a simple heuristic
1480is used. It is as simple as choosing the prefix that is most frequently
1481referenced within the selection.
1482
1483\section{Unfixes}\label{s:unfixes}
1484The prefixes that are not valid as move targets are called unfixes.
1485
47c0bea8
EK
1486An unfix can be a name that is assigned to within a selection. The reason that
1487this cannot be allowed, is that the result would be an assignment to the
1488\type{this} keyword, which is not valid in Java \see{eclipse_bug_420726}.
1489
1490Prefixes that originates from variable declarations within the same selection
1491are also considered unfixes. This is because when a method is moved, it needs to
1492be called through a variable. If this variable is also declared within the
1493method that is to be moved, this obviously cannot be done.
1494
1495Also considered as unfixes are variable references that are of types that are
1496not suitable for moving methods to. This can either be because it is not
1497physically possible to move a method to the desired class or that it will cause
1498compilation errors by doing so.
1499
1500If the type binding for a name is not resolved it is considered and unfix. The
1501same applies to types that is only found in compiled code, so they have no
1502underlying source that is accessible to us. (E.g. the \type{java.lang.String}
1503class.)
1504
1505Interfaces types are not suitable as targets. This is simply because interfaces
1506in Java cannot contain methods with bodies. (This thesis does not deal with
1507features of Java versions later than Java 7. Java 8 has interfaces with default
1508implementations of methods.)
1509
1510Neither are local types allowed. This accounts for both local and anonymous
1511classes. Anonymous classes are effectively the same as interface types with
1512respect to unfixes. Local classes could in theory be used as targets, but this
1513is not possible due to limitations of the way the \refa{Extract and Move Method}
1514refactoring has to be implemented. The problem is that the refactoring is done
1515in two steps, so the intermediate state between the two refactorings would not
1516be legal Java code. In the intermediate step for the case where a local class is
1517the move target, the extracted method would need to take the local class as a
1518parameter. This new method would need to live in the scope of the declaring
1519class of the originating method. The local class would then not be in the scope
1520of the extracted method, thus bringing the source code into an illegal state.
1521One could imagine that the method was extracted and moved in one operation,
1522without an intermediate state. Then it would make sense to include variables
1523with types of local classes in the set of legal targets, since the local classes
1524would then be in the scopes of the method calls. If this makes any difference
1525for software metrics that measure coupling would be a different discussion.
1526
1527\begin{listing}
1528\begin{multicols}{2}
1529\begin{minted}[]{java}
1530// Before
1531void declaresLocalClass() {
1532 class LocalClass {
1533 void foo() {}
1534 void bar() {}
1535 }
1536
1537 LocalClass inst =
1538 new LocalClass();
1539 inst.foo();
1540 inst.bar();
1541}
1542\end{minted}
1543
1544\columnbreak
1545
1546\begin{minted}[]{java}
1547// After Extract Method
1548void declaresLocalClass() {
1549 class LocalClass {
1550 void foo() {}
1551 void bar() {}
1552 }
1553
1554 LocalClass inst =
1555 new LocalClass();
1556 fooBar(inst);
1557}
1558
1559// Intermediate step
1560void fooBar(LocalClass inst) {
1561 inst.foo();
1562 inst.bar();
1563}
1564\end{minted}
1565\end{multicols}
1566\caption{When the \refa{Extract and Move Method} tries to use a variable with a
1567local type as the move target, an intermediate step is performed that is not
1568allowed. Here: \type{LocalClass} is not in the scope of \method{fooBar} in its
1569intermediate location.}
1570\label{lst:extractMethod_LocalClass}
1571\end{listing}
1572
1573The last class of names that are considered unfixes are names used in null
1574tests. These are tests that reads like this: if \texttt{<name>} equals
1575\var{null} then do something. If allowing variables used in those kinds of
1576expressions as targets for moving methods, we would end up with code containing
1577boolean expressions like \texttt{this == null}, which would not be meaningful,
1578since \var{this} would never be \var{null}.
1579
0cc6a67d
EK
1580\todoin{Describe what a text selection is?}
1581
46f52f52
EK
1582\section{Choosing the selection}
1583When choosing a selection between the text selections that have possible move
1584targets, the selections need to be ordered. The criteria below are presented in
1585the order they are prioritized. If not one selection is favored over the other
1586for a concrete criterion, the selections are evaluated by the next criterion.
1587
8de7cf3c
EK
1588\begin{enumerate}
1589 \item The first criterion that is evaluated is whether a selection contains
1c521a77
EK
1590 any unfixes or not. If selection \m{A} contains no unfixes, while
1591 selection \m{B} does, selection \m{A} is favored over selection
1592 \m{B}. This is done under the assumption that, if possible, avoiding
8de7cf3c
EK
1593 selections containing unfixes will make the code moved a little
1594 cleaner.\todoin{more arguments?}
1595
1596 \item The second criterion that is evaluated is how many possible targets a
1c521a77
EK
1597 selection contains. If selection \m{A} has only one possible target, and
1598 selection \m{B} has multiple, selection \m{A} is favored. If both
8de7cf3c
EK
1599 selections have multiple possible targets, they are considered equal with
1600 respect to this criterion. The rationale for this heuristic is that we would
1601 prefer not to introduce new couplings between classes when performing the
1602 \ExtractAndMoveMethod refactoring.
1603
1604 \item When evaluating the last criterion, this is with the knowledge that
1c521a77
EK
1605 selection \m{A} and \m{B} both have one possible target. Then, if
1606 the move target candidate of selection \m{A} has a higher reference count
1607 than the target candidate of selection \m{B}, selection \m{A} is
8de7cf3c
EK
1608 favored. The reason for this is that we would like to move the selection that
1609 gets rid of the most references to another class.
1610
1611\end{enumerate}
1612
1613If none of the above mentioned criteria favors one selection over another, the
1614selections are considered to be equally good candidates for the
1615\ExtractAndMoveMethod refactoring.
c894b297 1616
0cc6a67d 1617\section{Disqualifying a selection}
1c521a77
EK
1618Certain text selections would lead to broken code if used as input to the
1619\ExtractAndMoveMethod refactoring. To avoid this, we have to check all text
15327961
EK
1620selections for such conditions before they are further analyzed. This sections
1621is therefore going to present some properties that makes a selection unsuitable
1622for our refactoring.
1c521a77
EK
1623
1624\subsection{A call to a protected or package-private method}
1625If a text selection contains such a call, it would not be safe to move it to
1626another class. The reason for this, is that we cannot know if the called method
1627is being overridden by some subclass of the \gloss{enclosingClass}, or not.
1628
1629Imagine that the protected method \method{foo} is declared in class \m{A},
1630and overridden in class \m{B}. The method \method{foo} is called from within a
1631selection done to a method in \m{A}. We want to extract and move this selection
1632to another class. The method \method{foo} is not public, so the \MoveMethod
1633refactoring must make it public, making the extracted method able to call it
1634from the extracted method's new location. The problem is that the, now public,
1635method \method{foo} is overridden in a subclass, where it has a protected
1636status. This makes the compiler complain that the subclass \m{B} is trying to
1637reduce the visibility of a method declared in its superclass \m{A}. This is not
1638allowed in Java, and for good reasons. It would make it possible to make a
1639subclass that could not be a substitute for its superclass.
1640\todoin{Code example?}
1641
1642\subsection{Instantiation of non-static inner class}
1643When a non-static inner class is instantiated, this must happen in the scope of
1644its declaring class. This is because it must have access to the members of the
1645declaring class. If the inner class is public, it is possible to instantiate it
1646through an instance of its declaring class, but this is not handled by the
1647underlying \MoveMethod refactoring.
1648
1649Performing a move on a method that instantiates a non-static inner class, will
1650break the code if the instantiation is not handled properly. For this reason,
1651selections that contains instantiations of non-static inner classes are deemed
1652unsuitable for the \ExtractAndMoveMethod refactoring.
1653
1654\subsection{References to enclosing instances of the enclosing class}
1655The title of this section may be a little hard to grasp at first. What it means
1656is that there is a (non-static) class \m{C} that is declared in the scope of
1657possibly multiple other classes. And there is a statement in a method declared
1658in class \m{C} that contains a reference to one or more instances of these
1659enclosing classes of \m{C}.
1660
1661The problem with this, is that these references may not be valid if they are
1662moved to another class. Theoretically, some situations could easily be solved by
1663passing, to the moved method, a reference to the instance where the problematic
1664referenced member is declared, in the case where this member is public. This is
1665not done in the underlying \MoveMethod refactoring, so it cannot be allowed in
1666the \ExtractAndMoveMethod refactoring either.
1667
15327961 1668\subsection{Inconsistent return statements}
cb903a13
EK
1669To verify that a text selection is consistent with respect to return statements,
1670we must check that if a selection contains a return statement, then every
1671possible execution path within the selection ends in either a return or a throw
15327961
EK
1672statement. This property is important regarding the \ExtractMethod refactoring.
1673If it holds, it means that a method could be extracted from the selection, and a
1674call to it could be substituted for the selection. If the method has a non-void
1675return type, then a call to it would also be a valid return point for the
1676calling method. If its return value is of the void type, then the \ExtractMethod
1677refactoring will append an empty return statement to the back of the method
1678call. Therefore, the analysis does not discriminate on either kinds of return
1679statements, with or without a return value.
1680
cb903a13
EK
1681A throw statement is accepted anywhere a return statement is required. This is
1682because a throw statement causes an immediate exit from the current block,
1683together with all outer blocks in its control flow that does not catch the
1684thrown exception.
15327961
EK
1685
1686Return statements can be either explicit or implicit. An \emph{explicit} return
1687statement is formed by using the \code{return} keyword, while an \emph{implicit}
cb903a13 1688return statement is a statement that is not formed using \code{return} keyword,
15327961
EK
1689but must be the last statement of a method that can have any side effects. This
1690can happen in methods with a void return type. An example is a statement that is
1691inside one or more blocks. The last statement of a method could for instance be
cb903a13
EK
1692a synchronized statement, but the last statement that is executed in the method,
1693and that can have any side effects, may be located inside the body of the
1694synchronized statement.
15327961
EK
1695
1696We can start the check for this property by looking at the last statement of a
1697selection to see if it is a return statement (explicit or implicit) or a
1698throw statement. If it is, then the property holds, assuming the selected code
cb903a13 1699does not contain any compilation errors.
15327961
EK
1700\todoin{State somewhere that we assume no compilation errors?}
1701
1702If the last statement of the selection is not a return or throw, the execution
1703of it must end in one. This means that all branches of the last statement of
1704every branch must end in a return or throw. Given this recursive definition,
1705there are only five types of statements that are guaranteed to end in a return
cb903a13 1706or throw if their child branches does. All other statements would have to be
15327961
EK
1707considered illegal. The first three, block-statements, labeled statements and
1708do-statements are all kinds of fall-through statements that always gets their
1709body executed. Do-statements would not make much sense if written such that they
1710always ends after the first round of execution of their body, but that is not
1711our concern. The remaining two statements that can end in a return or throw are
1712if-statements and try-statements.
1713
cb903a13
EK
1714For an if-statement, the rule is that if its then-part does not contain any
1715return or throw statements, it is considered illegal. If the then-part does
1716contain a return or throw, the else-part is checked. If its else-part is
1717non-existent, or it does not contain any return or throw statements, the
1718statement is considered illegal. If an if-statement is not considered illegal,
1719the bodies of its parts must be checked.
15327961 1720
cb903a13
EK
1721Try-statements are handled much the same way as if-statements. The body of a
1722try-statement must contain a return or throw. The same applies to its catch
1723clauses and finally body.
1c521a77 1724
11d9e6e1
EK
1725\subsection{Ambiguous return values}
1726The problem with ambiguous return values arise when a selection is chosen to be
1727extracted into a new method, but it needs to return more than one value from
1728that method.
1729
1730This problem occurs in two situations. The first situation arise when there is
1731more than one local variable that is both assigned to within a selection and
1732also referenced after the selection. The other situation occur when there is
1733only one such assignment, but the selection also contain return statements.
1734
1735Therefore we must examine the selection for assignments to local variables that
1736are referenced after the text selection. Then we must verify that not more than
1737one such reference is done, or zero if any return statements are found.
1738
83f12332
EK
1739\subsection{Illegal statements}
1740An illegal statement may be a statement that is of a type that is never allowed,
1741or it may be a statement that is only allowed if certain conditions are true.
1742
1743Any use of the \var{super} keyword is prohibited, since its meaning is altered
1744when moving a method to another class.
1745
1746For a \emph{break} statement, there are two situations to consider: A break
1747statement with or without a label. If the break statement has a label, it is
1748checked that whole of the labeled statement is inside the selection. If the
1749break statement does not have a label attached to it, it is checked that its
1750innermost enclosing loop or switch statement also is inside the selection.
1751
1752The situation for a \emph{continue} statement is the same as for a break
1753statement, except that it is not allowed inside switch statements.
1754
1755Regarding \emph{assignments}, two types of assignments are allowed: Assignments
1756to non-final variables and assignments to array access. All other assignments
1757are regarded illegal.
1758
1759\todoin{Expand with more illegal statements and/or conclude that I did not have
1760time to analyze all statements types.}
1761
1762
0cc6a67d 1763
5837a41f
EK
1764\chapter{Refactorings in Eclipse JDT: Design, Shortcomings and Wishful
1765Thinking}\label{ch:jdt_refactorings}
1766
1767This chapter will deal with some of the design behind refactoring support in
fe0a4c48 1768\name{Eclipse}, and the JDT in specific. After which it will follow a section about
5837a41f
EK
1769shortcomings of the refactoring API in terms of composition of refactorings. The
1770chapter will be concluded with a section telling some of the ways the
1771implementation of refactorings in the JDT could have worked to facilitate
1772composition of refactorings.
055dca93 1773
b0e80574 1774\section{Design}
fe0a4c48 1775The refactoring world of \name{Eclipse} can in general be separated into two parts: The
b289552b 1776language independent part and the part written for a specific programming
07e173d4
EK
1777language -- the language that is the target of the supported refactorings.
1778\todo{What about the language specific part?}
f041551b
EK
1779
1780\subsection{The Language Toolkit}
1326eec6
EK
1781The Language Toolkit\footnote{The content of this section is a mixture of
1782 written material from
1783 \url{https://www.eclipse.org/articles/Article-LTK/ltk.html} and
1784 \url{http://www.eclipse.org/articles/article.php?file=Article-Unleashing-the-Power-of-Refactoring/index.html},
1785the LTK source code and my own memory.}, or LTK for short, is the framework that
fe0a4c48 1786is used to implement refactorings in \name{Eclipse}. It is language independent and
1326eec6
EK
1787provides the abstractions of a refactoring and the change it generates, in the
1788form of the classes \typewithref{org.eclipse.ltk.core.refactoring}{Refactoring}
1789and \typewithref{org.eclipse.ltk.core.refactoring}{Change}.
1790
1791There are also parts of the LTK that is concerned with user interaction, but
1792they will not be discussed here, since they are of little value to us and our
1793use of the framework. We are primarily interested in the parts that can be
1794automated.
f041551b
EK
1795
1796\subsubsection{The Refactoring Class}
1797The abstract class \type{Refactoring} is the core of the LTK framework. Every
1798refactoring that is going to be supported by the LTK have to end up creating an
1799instance of one of its subclasses. The main responsibilities of subclasses of
1800\type{Refactoring} is to implement template methods for condition checking
1801(\methodwithref{org.eclipse.ltk.core.refactoring.Refactoring}{checkInitialConditions}
1802and
1803\methodwithref{org.eclipse.ltk.core.refactoring.Refactoring}{checkFinalConditions}),
1804in addition to the
1805\methodwithref{org.eclipse.ltk.core.refactoring.Refactoring}{createChange}
07e173d4
EK
1806method that creates and returns an instance of the \type{Change} class.
1807
1808If the refactoring shall support that others participate in it when it is
1809executed, the refactoring has to be a processor-based
1810refactoring\typeref{org.eclipse.ltk.core.refactoring.participants.ProcessorBasedRefactoring}.
1811It then delegates to its given
1812\typewithref{org.eclipse.ltk.core.refactoring.participants}{RefactoringProcessor}
1326eec6
EK
1813for condition checking and change creation. Participating in a refactoring can
1814be useful in cases where the changes done to programming source code affects
1815other related resources in the workspace. This can be names or paths in
1816configuration files, or maybe one would like to perform additional logging of
1817changes done in the workspace.
f041551b
EK
1818
1819\subsubsection{The Change Class}
07e173d4
EK
1820This class is the base class for objects that is responsible for performing the
1821actual workspace transformations in a refactoring. The main responsibilities for
1822its subclasses is to implement the
1823\methodwithref{org.eclipse.ltk.core.refactoring.Change}{perform} and
1824\methodwithref{org.eclipse.ltk.core.refactoring.Change}{isValid} methods. The
1825\method{isValid} method verifies that the change object is valid and thus can be
1826executed by calling its \method{perform} method. The \method{perform} method
1827performs the desired change and returns an undo change that can be executed to
1828reverse the effect of the transformation done by its originating change object.
1829
61420ef7 1830\subsubsection{Executing a Refactoring}\label{executing_refactoring}
07e173d4
EK
1831The life cycle of a refactoring generally follows two steps after creation:
1832condition checking and change creation. By letting the refactoring object be
1833handled by a
1834\typewithref{org.eclipse.ltk.core.refactoring}{CheckConditionsOperation} that
1835in turn is handled by a
1836\typewithref{org.eclipse.ltk.core.refactoring}{CreateChangeOperation}, it is
1837assured that the change creation process is managed in a proper manner.
1838
1839The actual execution of a change object has to follow a detailed life cycle.
1840This life cycle is honored if the \type{CreateChangeOperation} is handled by a
1841\typewithref{org.eclipse.ltk.core.refactoring}{PerformChangeOperation}. If also
1842an undo manager\typeref{org.eclipse.ltk.core.refactoring.IUndoManager} is set
1843for the \type{PerformChangeOperation}, the undo change is added into the undo
1844history.
055dca93 1845
b0e80574 1846\section{Shortcomings}
80663734 1847This section is introduced naturally with a conclusion: The JDT refactoring
5837a41f
EK
1848implementation does not facilitate composition of refactorings.
1849\todo{refine}This section will try to explain why, and also identify other
1850shortcomings of both the usability and the readability of the JDT refactoring
1851source code.
80663734
EK
1852
1853I will begin at the end and work my way toward the composition part of this
1854section.
1855
5837a41f 1856\subsection{Absence of Generics in Eclipse Source Code}
80663734 1857This section is not only concerning the JDT refactoring API, but also large
fe0a4c48 1858quantities of the \name{Eclipse} source code. The code shows a striking absence of the
80663734 1859Java language feature of generics. It is hard to read a class' interface when
5837a41f
EK
1860methods return objects or takes parameters of raw types such as \type{List} or
1861\type{Map}. This sometimes results in having to read a lot of source code to
1862understand what is going on, instead of relying on the available interfaces. In
1863addition, it results in a lot of ugly code, making the use of typecasting more
1864of a rule than an exception.
1865
1866\subsection{Composite Refactorings Will Not Appear as Atomic Actions}
1867
1868\subsubsection{Missing Flexibility from JDT Refactorings}
1869The JDT refactorings are not made with composition of refactorings in mind. When
1870a JDT refactoring is executed, it assumes that all conditions for it to be
1326eec6 1871applied successfully can be found by reading source files that have been
5837a41f
EK
1872persisted to disk. They can only operate on the actual source material, and not
1873(in-memory) copies thereof. This constitutes a major disadvantage when trying to
1326eec6
EK
1874compose refactorings, since if an exception occurs in the middle of a sequence
1875of refactorings, it can leave the project in a state where the composite
1876refactoring was only partially executed. It makes it hard to discard the changes
5837a41f
EK
1877done without monitoring and consulting the undo manager, an approach that is not
1878bullet proof.
1879
1880\subsubsection{Broken Undo History}
1881When designing a composed refactoring that is to be performed as a sequence of
1882refactorings, you would like it to appear as a single change to the workspace.
1883This implies that you would also like to be able to undo all the changes done by
1884the refactoring in a single step. This is not the way it appears when a sequence
1885of JDT refactorings is executed. It leaves the undo history filled up with
1886individual undo actions corresponding to every single JDT refactoring in the
fe0a4c48 1887sequence. This problem is not trivial to handle in \name{Eclipse}
e123ab03 1888\see{hacking_undo_history}.
5837a41f
EK
1889
1890\section{Wishful Thinking}
3727b75b 1891\todoin{???}
80663734 1892
a7514fbd 1893
b0e80574
EK
1894\chapter{Composite Refactorings in Eclipse}
1895
1896\section{A Simple Ad Hoc Model}
fe0a4c48 1897As pointed out in \myref{ch:jdt_refactorings}, the \name{Eclipse} JDT refactoring model
8b6b22c8
EK
1898is not very well suited for making composite refactorings. Therefore a simple
1899model using changer objects (of type \type{RefaktorChanger}) is used as an
fe0a4c48 1900abstraction layer on top of the existing \name{Eclipse} refactorings, instead of
50954fde
EK
1901extending the \typewithref{org.eclipse.ltk.core.refactoring}{Refactoring} class.
1902
1903The use of an additional abstraction layer is a deliberate choice. It is due to
1904the problem of creating a composite
1905\typewithref{org.eclipse.ltk.core.refactoring}{Change} that can handle text
1906changes that interfere with each other. Thus, a \type{RefaktorChanger} may, or
1907may not, take advantage of one or more existing refactorings, but it is always
1908intended to make a change to the workspace.
1909
1910\subsection{A typical \type{RefaktorChanger}}
1911The typical refaktor changer class has two responsibilities, checking
1912preconditions and executing the requested changes. This is not too different
1913from the responsibilities of an LTK refactoring, with the distinction that a
1914refaktor changer also executes the change, while an LTK refactoring is only
1915responsible for creating the object that can later be used to do the job.
1916
1917Checking of preconditions is typically done by an
1918\typewithref{no.uio.ifi.refaktor.analyze.analyzers}{Analyzer}. If the
1919preconditions validate, the upcoming changes are executed by an
1920\typewithref{no.uio.ifi.refaktor.change.executors}{Executor}.
b0e80574
EK
1921
1922\section{The Extract and Move Method Refactoring}
61420ef7
EK
1923%The Extract and Move Method Refactoring is implemented mainly using these
1924%classes:
1925%\begin{itemize}
1926% \item \type{ExtractAndMoveMethodChanger}
1927% \item \type{ExtractAndMoveMethodPrefixesExtractor}
1928% \item \type{Prefix}
1929% \item \type{PrefixSet}
1930%\end{itemize}
1931
1932\subsection{The Building Blocks}
1933This is a composite refactoring, and hence is built up using several primitive
b5c7bb1b
EK
1934refactorings. These basic building blocks are, as its name implies, the
1935\ExtractMethod refactoring\citing{refactoring} and the \MoveMethod
fe0a4c48 1936refactoring\citing{refactoring}. In \name{Eclipse}, the implementations of these
b5c7bb1b 1937refactorings are found in the classes
61420ef7
EK
1938\typewithref{org.eclipse.jdt.internal.corext.refactoring.code}{ExtractMethodRefactoring}
1939and
1940\typewithref{org.eclipse.jdt.internal.corext.refactoring.structure}{MoveInstanceMethodProcessor},
1941where the last class is designed to be used together with the processor-based
1942\typewithref{org.eclipse.ltk.core.refactoring.participants}{MoveRefactoring}.
1943
1944\subsubsection{The ExtractMethodRefactoring Class}
1945This class is quite simple in its use. The only parameters it requires for
1946construction is a compilation
1947unit\typeref{org.eclipse.jdt.core.ICompilationUnit}, the offset into the source
1948code where the extraction shall start, and the length of the source to be
1949extracted. Then you have to set the method name for the new method together with
50954fde 1950its visibility and some not so interesting parameters.
61420ef7
EK
1951
1952\subsubsection{The MoveInstanceMethodProcessor Class}
fe0a4c48
EK
1953For the \refa{Move Method}, the processor requires a little more advanced input than
1954the class for the \refa{Extract Method}. For construction it requires a method
50954fde
EK
1955handle\typeref{org.eclipse.jdt.core.IMethod} for the method that is to be moved.
1956Then the target for the move have to be supplied as the variable binding from a
1957chosen variable declaration. In addition to this, one have to set some
1958parameters regarding setters/getters, as well as delegation.
61420ef7 1959
50954fde
EK
1960To make a working refactoring from the processor, one have to create a
1961\type{MoveRefactoring} with it.
b0e80574 1962
356782a0 1963\subsection{The ExtractAndMoveMethodChanger}
50954fde 1964
61420ef7 1965The \typewithref{no.uio.ifi.refaktor.changers}{ExtractAndMoveMethodChanger}
50954fde
EK
1966class is a subclass of the class
1967\typewithref{no.uio.ifi.refaktor.changers}{RefaktorChanger}. It is responsible
1968for analyzing and finding the best target for, and also executing, a composition
fe0a4c48 1969of the \refa{Extract Method} and \refa{Move Method} refactorings. This particular changer is
50954fde
EK
1970the one of my changers that is closest to being a true LTK refactoring. It can
1971be reworked to be one if the problems with overlapping changes are resolved. The
1972changer requires a text selection and the name of the new method, or else a
1973method name will be generated. The selection has to be of the type
1974\typewithref{no.uio.ifi.refaktor.utils}{CompilationUnitTextSelection}. This
1975class is a custom extension to
1976\typewithref{org.eclipse.jface.text}{TextSelection}, that in addition to the
1977basic offset, length and similar methods, also carry an instance of the
1978underlying compilation unit handle for the selection.
1979
ae138b9d
EK
1980\subsubsection{The
1981 \type{ExtractAndMoveMethodAnalyzer}}\label{extractAndMoveMethodAnalyzer}
50954fde
EK
1982The analysis and precondition checking is done by the
1983\typewithref{no.uio.ifi.refaktor.analyze.analyzers}{ExtractAnd\-MoveMethodAnalyzer}.
1984First is check whether the selection is a valid selection or not, with respect
1985to statement boundaries and that it actually contains any selections. Then it
1986checks the legality of both extracting the selection and also moving it to
95c0f364
EK
1987another class. This checking of is performed by a range of checkers
1988\see{checkers}. If the selection is approved as legal, it is analyzed to find
1989the presumably best target to move the extracted method to.
50954fde
EK
1990
1991For finding the best suitable target the analyzer is using a
1992\typewithref{no.uio.ifi.refaktor.analyze.collectors}{PrefixesCollector} that
ae138b9d
EK
1993collects all the possible candidate targets for the refactoring. All the
1994non-candidates is found by an
50954fde 1995\typewithref{no.uio.ifi.refaktor.analyze.collectors}{UnfixesCollector} that
b8fce5af 1996collects all the targets that will give some kind of error if used. (For
3ab3e132 1997details about the property collectors, see \myref{propertyCollectors}.) All
b8fce5af 1998prefixes (and unfixes) are represented by a
a6415293
EK
1999\typewithref{no.uio.ifi.refaktor.extractors}{Prefix}, and they are collected
2000into sets of prefixes. The safe prefixes is found by subtracting from the set of
b8fce5af 2001candidate prefixes the prefixes that is enclosing any of the unfixes. A prefix
a6415293
EK
2002is enclosing an unfix if the unfix is in the set of its sub-prefixes. As an
2003example, \texttt{``a.b''} is enclosing \texttt{``a''}, as is \texttt{``a''}. The
2004safe prefixes is unified in a \type{PrefixSet}. If a prefix has only one
2005occurrence, and is a simple expression, it is considered unsuitable as a move
2006target. This occurs in statements such as \texttt{``a.foo()''}. For such
2007statements it bares no meaning to extract and move them. It only generates an
2008extra method and the calling of it.
50954fde 2009
fbaa89ce
EK
2010The most suitable target for the refactoring is found by finding the prefix with
2011the most occurrences. If two prefixes have the same occurrence count, but they
2012differ in length, the longest of them is chosen.
2013
0f6e45f8 2014\todoin{Clean up sections/subsections.}
50954fde 2015
c8088eec
EK
2016\subsubsection{The
2017 \type{ExtractAndMoveMethodExecutor}}\label{extractAndMoveMethodExecutor}
50954fde
EK
2018If the analysis finds a possible target for the composite refactoring, it is
2019executed by an
2020\typewithref{no.uio.ifi.refaktor.change.executors}{ExtractAndMoveMethodExecutor}.
2021It is composed of the two executors known as
2022\typewithref{no.uio.ifi.refaktor.change.executors}{ExtractMethodRefactoringExecutor}
2023and
2024\typewithref{no.uio.ifi.refaktor.change.executors}{MoveMethodRefactoringExecutor}.
2025The \type{ExtractAndMoveMethodExecutor} is responsible for gluing the two
3727b75b 2026together by feeding the \type{MoveMethod\-RefactoringExecutor} with the
222d172b
EK
2027resources needed after executing the extract method refactoring.
2028%\see{postExtractExecution}.
50954fde
EK
2029
2030\subsubsection{The \type{ExtractMethodRefactoringExecutor}}
2031This executor is responsible for creating and executing an instance of the
2032\type{ExtractMethodRefactoring} class. It is also responsible for collecting
2033some post execution resources that can be used to find the method handle for the
2034extracted method, as well as information about its parameters, including the
2035variable they originated from.
2036
2037\subsubsection{The \type{MoveMethodRefactoringExecutor}}
2038This executor is responsible for creating and executing an instance of the
2039\type{MoveRefactoring}. The move refactoring is a processor-based refactoring,
fe0a4c48 2040and for the \refa{Move Method} refactoring it is the \type{MoveInstanceMethodProcessor}
50954fde
EK
2041that is used.
2042
2043The handle for the method to be moved is found on the basis of the information
fe0a4c48 2044gathered after the execution of the \refa{Extract Method} refactoring. The only
50954fde
EK
2045information the \type{ExtractMethodRefactoring} is sharing after its execution,
2046regarding find the method handle, is the textual representation of the new
2047method signature. Therefore it must be parsed, the strings for types of the
2048parameters must be found and translated to a form that can be used to look up
2049the method handle from its type handle. They have to be on the unresolved
2050form.\todo{Elaborate?} The name for the type is found from the original
2051selection, since an extracted method must end up in the same type as the
2052originating method.
2053
fe0a4c48 2054When analyzing a selection prior to performing the \refa{Extract Method} refactoring, a
50954fde
EK
2055target is chosen. It has to be a variable binding, so it is either a field or a
2056local variable/parameter. If the target is a field, it can be used with the
2057\type{MoveInstanceMethodProcessor} as it is, since the extracted method still is
2058in its scope. But if the target is local to the originating method, the target
2059that is to be used for the processor must be among its parameters. Thus the
2060target must be found among the extracted method's parameters. This is done by
2061finding the parameter information object that corresponds to the parameter that
2062was declared on basis of the original target's variable when the method was
2063extracted. (The extracted method must take one such parameter for each local
2064variable that is declared outside the selection that is extracted.) To match the
2065original target with the correct parameter information object, the key for the
2066information object is compared to the key from the original target's binding.
2067The source code must then be parsed to find the method declaration for the
2068extracted method. The new target must be found by searching through the
2069parameters of the declaration and choose the one that has the same type as the
2070old binding from the parameter information object, as well as the same name that
2071is provided by the parameter information object.
2072
2073
356782a0
EK
2074\subsection{The
2075SearchBasedExtractAndMoveMethodChanger}\label{searchBasedExtractAndMoveMethodChanger}
c8088eec
EK
2076The
2077\typewithref{no.uio.ifi.refaktor.change.changers}{SearchBasedExtractAndMoveMethodChanger}
2078is a changer whose purpose is to automatically analyze a method, and execute the
2079\ExtractAndMoveMethod refactoring on it if it is a suitable candidate for the
2080refactoring.
2081
2082First, the \typewithref{no.uio.ifi.refaktor.analyze.analyzers}{SearchBasedExtractAndMoveMethodAnalyzer} is used
2083to analyze the method. If the method is found to be a candidate, the result from
2084the analysis is fed to the \type{ExtractAndMoveMethodExecutor}, whose job is to
2085execute the refactoring \see{extractAndMoveMethodExecutor}.
2086
2087\subsubsection{The SearchBasedExtractAndMoveMethodAnalyzer}
2088This analyzer is responsible for analyzing all the possible text selections of a
2089method and then choose the best result out of the analysis results that is, by
2090the analyzer, considered to be the potential candidates for the Extract and Move
2091Method refactoring.
2092
2093Before the analyzer is able to work with the text selections of a method, it
2094needs to generate them. To do this, it parses the method to obtain a
2095\type{MethodDeclaration} for it \see{astEclipse}. Then there is a statement
2096lists creator that creates statements lists of the different groups of
2097statements in the body of the method declaration. A text selections generator
2098generates text selections of all the statement lists for the analyzer to work
2099with.
2100
2101\paragraph{The statement lists creator}
2102is responsible for generating lists of statements for all the possible levels of
2103statements in the method. The statement lists creator is implemented as an AST
2104visitor \see{astVisitor}. It generates lists of statements by visiting all the
2105blocks in the method declaration and stores their statements in a collection of
2106statement lists. In addition, it visits all of the other statements that can
2107have a statement as a child, such as the different control structures and the
2108labeled statement.
2109
2110The switch statement is the only kind of statement that is not straight forward
2111to obtain the child statements from. It stores all of its children in a flat
2112list. Its switch case statements are included in this list. This means that
2113there are potential statement lists between all of these case statements. The
2114list of statements from a switch statement is therefore traversed, and the
2115statements between the case statements are grouped as separate lists.
2116
2117There is an example of how the statement lists creator would generate lists for
2118a simple method in \myref{lst:statementListsExample}.
2119
2120\begin{listing}[h]
2121\def\charwidth{5.7pt}
2122\def\indent{4*\charwidth}
2123\def\lineheight{\baselineskip}
2124\def\mintedtop{\lineheight}
2125
2126\begin{tikzpicture}[overlay, yscale=-1]
2127 \tikzstyle{overlaybox}=[fill=lightgray,opacity=0.2]
2128 \draw[overlaybox] (0,\mintedtop+\lineheight) rectangle
2129 +(22*\charwidth,10*\lineheight);
2130 \draw[overlaybox] (\indent,\mintedtop+2*\lineheight) rectangle
2131 +(13*\charwidth,\lineheight);
2132 \draw[overlaybox] (2*\indent,\mintedtop+6*\lineheight) rectangle
2133 +(13*\charwidth,2*\lineheight);
2134 \draw[overlaybox] (2*\indent,\mintedtop+9*\lineheight) rectangle
2135 +(13*\charwidth,\lineheight);
2136\end{tikzpicture}
2137\begin{minted}{java}
2138void method() {
2139 if (bool)
2140 b.bar();
2141
2142 switch (val) {
2143 case 1:
2144 b.foo();
2145 c.foo();
2146 default:
2147 c.foo();
2148 }
2149}
2150\end{minted}
2151\caption{Example of how the statement lists creator would group a simple method
2152into lists of statements. Each highlighted rectangle represents a list.}
2153\label{lst:statementListsExample}
2154\end{listing}
2155
2156\paragraph{The text selections generator} generates text selections for each
2157list of statements from the statement lists creator. Conceptually, the generator
ae138b9d
EK
2158generates a text selection for every possible ordered \todo{make clearer}
2159combination of statements in a list. For a list of statements, the boundary
2160statements span out a text selection. This means that there are many different
2161lists that could span out the same selection.
c8088eec
EK
2162
2163In practice, the text selections are calculated by only one traversal of the
2164statement list. There is a set of generated text selections. For each statement,
2165there is created a temporary set of selections, in addition to a text selection
2166based on the offset and length of the statement. This text selection is added to
2167the temporary set. Then the new selection is added with every selection from the
2168set of generated text selections. These new selections are added to the
2169temporary set. Then the temporary set of selections is added to the set of
2170generated text selections. The result of adding two text selections is a new
2171text selection spanned out by the two addends.
2172
0fa64de5
EK
2173\begin{listing}[h]
2174\def\charwidth{5.7pt}
2175\def\indent{4*\charwidth}
2176\def\lineheight{\baselineskip}
2177\def\mintedtop{\lineheight}
2178
2179\begin{tikzpicture}[overlay, yscale=-1]
2180 \tikzstyle{overlaybox}=[fill=lightgray,opacity=0.2]
2181
2182 \draw[overlaybox] (2*\charwidth,\mintedtop) rectangle
2183 +(18*\charwidth,\lineheight);
2184
2185 \draw[overlaybox] (2*\charwidth,\mintedtop+\lineheight) rectangle
2186 +(18*\charwidth,\lineheight);
2187
2188 \draw[overlaybox] (2*\charwidth,\mintedtop+3*\lineheight) rectangle
2189 +(18*\charwidth,\lineheight);
2190
2191 \draw[overlaybox] (\indent-3*\charwidth,\mintedtop) rectangle
2192 +(20*\charwidth,2*\lineheight);
2193
2194 \draw[overlaybox] (3*\charwidth,\mintedtop+\lineheight) rectangle
2195 +(16*\charwidth,3*\lineheight);
2196
2197 \draw[overlaybox] (\indent,\mintedtop) rectangle
2198 +(14*\charwidth,4*\lineheight);
2199\end{tikzpicture}
2200\begin{minted}{java}
2201 statement one;
2202 statement two;
2203 ...
2204 statement k;
2205\end{minted}
2206\caption{Example of how the text selections generator would generate text
2207 selections based on a lists of statements. Each highlighted rectangle
2208represents a text selection.}
2209\label{lst:textSelectionsExample}
2210\end{listing}
021508ad
EK
2211\todoin{fix \myref{lst:textSelectionsExample}? Text only? All
2212sub-sequences\ldots}
0fa64de5 2213
ae138b9d
EK
2214\paragraph{Finding the candidate} for the refactoring is done by analyzing all
2215the generated text selection with the \type{ExtractAndMoveMethodAnalyzer}
2216\see{extractAndMoveMethodAnalyzer}. If the analyzer generates a useful result,
2217an \type{ExtractAndMoveMethodCandidate} is created from it, that is kept in a
2218list of potential candidates. If no candidates are found, the
2219\type{NoTargetFoundException} is thrown.
2220
2221Since only one of the candidates can be chosen, the analyzer must sort out which
2222candidate to choose. The sorting is done by the static \method{sort} method of
2223\type{Collections}. The comparison in this sorting is done by an
2224\type{ExtractAndMoveMethodCandidateComparator}.
2225\todoin{Write about the
2226ExtractAndMoveMethodCandidateComparator/FavorNoUnfixesCandidateComparator}
2227
2228\paragraph{The complexity} of how many text selections that needs to be analyzed
2229for a total of $n$ statements is bounded by $O(n^2)$.
2230
3471cd15 2231\begin{theorem}
ae138b9d
EK
2232The number of text selections that need to be analyzed for each list of
2233statements of length $n$, is exactly
2234
3471cd15 2235\begin{equation*}
ae138b9d
EK
2236 \sum_{i=1}^{n} i = \frac{n(n+1)}{2}
2237 \label{eq:complexityStatementList}
3471cd15
EK
2238\end{equation*}
2239\label{thm:numberOfTextSelection}
2240\end{theorem}
ae138b9d
EK
2241
2242\begin{proof}
2243 For $n=1$ this is trivial: $\frac{1(1+1)}{2} = \frac{2}{2} = 1$. One statement
2244 equals one selection.
2245
0cc6a67d
EK
2246 For $n=2$, you get one text selection for the first statement, one selection
2247 for the second statement, and one selection for the two of them combined.
2248 This equals three selections. $\frac{2(2+1)}{2} = \frac{6}{2} = 3$.
ae138b9d
EK
2249
2250 For $n=3$, you get 3 selections for the two first statements, as in the case
2251 where $n=2$. In addition you get one selection for the third statement itself,
2252 and two more statements for the combinations of it with the two previous
2253 statements. This equals six selections. $\frac{3(3+1)}{2} = \frac{12}{2} = 6$.
2254
2255 Assume that for $n=k$ there exists $\frac{k(k+1)}{2}$ text selections. Then we
2256 want to add selections for another statement, following the previous $k$
2257 statements. So, for $n=k+1$, we get one additional selection for the statement
2258 itself. Then we get one selection for each pair of the new selection and the
2259 previous $k$ statements. So the total number of selections will be the number
2260 of already generated selections, plus $k$ for every pair, plus one for the
2261 statement itself: $\frac{k(k+1)}{2} + k +
2262 1 = \frac{k(k+1)+2k+2}{2} = \frac{k(k+1)+2(k+1)}{2} = \frac{(k+1)(k+2)}{2} =
2263 \frac{(k+1)((k+1)+1)}{2} = \sum_{i=1}^{k+1} i$
2264\end{proof}
2265
0cc6a67d
EK
2266%\definition{A \emph{body of statements} is a sequence of statements where every
2267%statement may have sub-statements.}
2268\todoin{Define ``body of statements''?}
2269
3471cd15
EK
2270\begin{theorem}
2271 The number of text selections for a body of statements is maximized if all the
2272 statements are at the same level.
2273 \label{thm:textSelectionsMaximized}
2274\end{theorem}
2275
2276\begin{proof}
3ab3e132 2277 Assume we have a body of, in total, $k$ statements. Let
0cc6a67d
EK
2278 $\{l,\ldots,m,(k-l-\ldots-m)\}$ be the lengths of the lists of statements in
2279 the body, with $l+\ldots+m<k \Rightarrow l,\ldots,m<k$.
3471cd15
EK
2280
2281 Then, the number of text selections that are generated for the $k$ statements
2282 is
2283
2284 {
2285 \small
2286 \begin{align*}
0cc6a67d 2287 \frac{(k-l-\ldots-m)((k-l-\ldots-m)+ 1)}{2} + \frac{l(l+1)}{2} + \ldots +
3471cd15 2288 \frac{m(m+1)}{2} = \\
0cc6a67d
EK
2289 \frac{k^2 - 2kl - \ldots - 2km + l^2 + \ldots + m^2 + k - l - \ldots - m}{2}
2290 + \frac{l^2+l}{2} + \ldots + \frac{m^2+m}{2} = \\
2291 \frac{k^2 + k + 2l^2 - 2kl + \ldots + 2m^2 - 2km}{2}
3471cd15
EK
2292 \end{align*}
2293 }
2294
0cc6a67d 2295 \noindent It then remains to show that this inequality holds:
3471cd15
EK
2296
2297 \begin{align*}
0cc6a67d 2298 \frac{k^2 + k + 2l^2 - 2kl + \ldots + 2m^2 - 2km}{2} < \frac{k(k+1)}{2} =
3471cd15
EK
2299 \frac{k^2 + k}{2}
2300 \end{align*}
2301
0cc6a67d
EK
2302 \noindent By multiplication by $2$ on both sides, and by removing the equal
2303 parts, we get
3471cd15
EK
2304
2305 \begin{align*}
0cc6a67d 2306 2l^2 - 2kl + \ldots + 2m^2 - 2km < 0
3471cd15
EK
2307 \end{align*}
2308
0cc6a67d 2309 Since $l,\ldots,m<k$, we have that $\forall i \in \{l,\ldots,m\} : 2ki > 2i^2$,
3471cd15
EK
2310 so all the pairs of parts on the form $2i^2-2ki$ are negative. In sum, the
2311 inequality holds.
2312
2313\end{proof}
ae138b9d
EK
2314
2315Therefore, the complexity for the number of selections that needs to be analyzed
3471cd15 2316for a body of $n$ statements is $O\bigl(\frac{n(n+1)}{2}\bigr) = O(n^2)$.
c8088eec 2317
356782a0 2318
222d172b 2319\begin{comment}
50954fde 2320\subsection{Finding the IMethod}\label{postExtractExecution}
356782a0 2321\todoin{Rename section. Write??}
222d172b 2322\end{comment}
61420ef7 2323
61420ef7 2324
b0e80574 2325\subsection{The Prefix Class}
a6415293
EK
2326This class exists mainly for holding data about a prefix, such as the expression
2327that the prefix represents and the occurrence count of the prefix within a
2328selection. In addition to this, it has some functionality such as calculating
2329its sub-prefixes and intersecting it with another prefix. The definition of the
2330intersection between two prefixes is a prefix representing the longest common
2331expression between the two.
2332
b0e80574 2333\subsection{The PrefixSet Class}
a6415293
EK
2334A prefix set holds elements of type \type{Prefix}. It is implemented with the
2335help of a \typewithref{java.util}{HashMap} and contains some typical set
2336operations, but it does not implement the \typewithref{java.util}{Set}
2337interface, since the prefix set does not need all of the functionality a
2338\type{Set} requires to be implemented. In addition It needs some other
2339functionality not found in the \type{Set} interface. So due to the relatively
2340limited use of prefix sets, and that it almost always needs to be referenced as
2341such, and not a \type{Set<Prefix>}, it remains as an ad hoc solution to a
2342concrete problem.
2343
2344There are two ways adding prefixes to a \type{PrefixSet}. The first is through
2345its \method{add} method. This works like one would expect from a set. It adds
2346the prefix to the set if it does not already contain the prefix. The other way
2347is to \emph{register} the prefix with the set. When registering a prefix, if the
2348set does not contain the prefix, it is just added. If the set contains the
2349prefix, its count gets incremented. This is how the occurrence count is handled.
2350
2351The prefix set also computes the set of prefixes that is not enclosing any
2352prefixes of another set. This is kind of a set difference operation only for
2353enclosing prefixes.
b0e80574 2354
5837a41f
EK
2355\subsection{Hacking the Refactoring Undo
2356History}\label{hacking_undo_history}
a6415293 2357\todoin{Where to put this section?}
8fae7b44
EK
2358
2359As an attempt to make multiple subsequent changes to the workspace appear as a
2360single action (i.e. make the undo changes appear as such), I tried to alter
2361the undo changes\typeref{org.eclipse.ltk.core.refactoring.Change} in the history
2362of the refactorings.
2363
2364My first impulse was to remove the, in this case, last two undo changes from the
f041551b 2365undo manager\typeref{org.eclipse.ltk.core.refactoring.IUndoManager} for the
fe0a4c48 2366\name{Eclipse} refactorings, and then add them to a composite
8fae7b44
EK
2367change\typeref{org.eclipse.ltk.core.refactoring.CompositeChange} that could be
2368added back to the manager. The interface of the undo manager does not offer a
2369way to remove/pop the last added undo change, so a possible solution could be to
4cb06723
EK
2370decorate\citing{designPatterns} the undo manager, to intercept and collect the
2371undo changes before delegating to the \method{addUndo}
f041551b 2372method\methodref{org.eclipse.ltk.core.refactoring.IUndoManager}{addUndo} of the
8fae7b44
EK
2373manager. Instead of giving it the intended undo change, a null change could be
2374given to prevent it from making any changes if run. Then one could let the
2375collected undo changes form a composite change to be added to the manager.
2376
2377There is a technical challenge with this approach, and it relates to the undo
2378manager, and the concrete implementation
2379UndoManager2\typeref{org.eclipse.ltk.internal.core.refactoring.UndoManager2}.
2380This implementation is designed in a way that it is not possible to just add an
2381undo change, you have to do it in the context of an active
2382operation\typeref{org.eclipse.core.commands.operations.TriggeredOperations}.
2383One could imagine that it might be possible to trick the undo manager into
2384believing that you are doing a real change, by executing a refactoring that is
2385returning a kind of null change that is returning our composite change of undo
2386refactorings when it is performed.
2387
2388Apart from the technical problems with this solution, there is a functional
2389problem: If it all had worked out as planned, this would leave the undo history
2390in a dirty state, with multiple empty undo operations corresponding to each of
2391the sequentially executed refactoring operations, followed by a composite undo
2392change corresponding to an empty change of the workspace for rounding of our
2393composite refactoring. The solution to this particular problem could be to
2394intercept the registration of the intermediate changes in the undo manager, and
2395only register the last empty change.
2396
2397Unfortunately, not everything works as desired with this solution. The grouping
2398of the undo changes into the composite change does not make the undo operation
2399appear as an atomic operation. The undo operation is still split up into
2400separate undo actions, corresponding to the change done by its originating
2401refactoring. And in addition, the undo actions has to be performed separate in
2402all the editors involved. This makes it no solution at all, but a step toward
2403something worse.
2404
2405There might be a solution to this problem, but it remains to be found. The
2406design of the refactoring undo management is partly to be blamed for this, as it
2407it is to complex to be easily manipulated.
2408
b0e80574 2409
0d7fbd88 2410
2f2080fb 2411
03674629 2412\chapter{Analyzing Source Code in Eclipse}
5308274d 2413
356782a0 2414\section{The Java model}\label{javaModel}
fe0a4c48 2415The Java model of \name{Eclipse} is its internal representation of a Java project. It
5308274d 2416is light-weight, and has only limited possibilities for manipulating source
fe0a4c48 2417code. It is typically used as a basis for the Package Explorer in \name{Eclipse}.
5308274d
EK
2418
2419The elements of the Java model is only handles to the underlying elements. This
2420means that the underlying element of a handle does not need to actually exist.
2421Hence the user of a handle must always check that it exist by calling the
2422\method{exists} method of the handle.
2423
356782a0 2424The handles with descriptions is listed in \myref{tab:javaModel}.
4e468834
EK
2425
2426\begin{table}[h]
2427 \centering
2428
2429 \newcolumntype{L}[1]{>{\hsize=#1\hsize\raggedright\arraybackslash}X}%
2430 % sum must equal number of columns (3)
2431 \begin{tabularx}{\textwidth}{| L{0.7} | L{1.1} | L{1.2} |}
2432 \hline
2433 \textbf{Project Element} & \textbf{Java Model element} &
2434 \textbf{Description} \\
2435 \hline
2436 Java project & \type{IJavaProject} & The Java project which contains all other objects. \\
2437 \hline
2438 Source folder /\linebreak[2] binary folder /\linebreak[3] external library &
2439 \type{IPackageFragmentRoot} & Hold source or binary files, can be a folder
2440 or a library (zip / jar file). \\
2441 \hline
2442 Each package & \type{IPackageFragment} & Each package is below the
2443 \type{IPackageFragmentRoot}, sub-packages are not leaves of the package,
2444 they are listed directed under \type{IPackageFragmentRoot}. \\
2445 \hline
2446 Java Source file & \type{ICompilationUnit} & The Source file is always below
2447 the package node. \\
2448 \hline
2449 Types /\linebreak[2] Fields /\linebreak[3] Methods & \type{IType} /
2450 \linebreak[0]
2451 \type{IField} /\linebreak[3] \type{IMethod} & Types, fields and methods. \\
2452 \hline
2453 \end{tabularx}
051cf433
EK
2454 \caption{The elements of the Java Model. {\footnotesize Taken from
2455 \url{http://www.vogella.com/tutorials/EclipseJDT/article.html}}}
356782a0 2456 \label{tab:javaModel}
4e468834
EK
2457\end{table}
2458
2459The hierarchy of the Java Model is shown in \myref{fig:javaModel}.
2460
5308274d
EK
2461\begin{figure}[h]
2462 \centering
2463 \begin{tikzpicture}[%
2464 grow via three points={one child at (0,-0.7) and
2465 two children at (0,-0.7) and (0,-1.4)},
2466 edge from parent path={(\tikzparentnode.south west)+(0.5,0) |-
2467 (\tikzchildnode.west)}]
2468 \tikzstyle{every node}=[draw=black,thick,anchor=west]
2469 \tikzstyle{selected}=[draw=red,fill=red!30]
2470 \tikzstyle{optional}=[dashed,fill=gray!50]
2471 \node {\type{IJavaProject}}
2472 child { node {\type{IPackageFragmentRoot}}
2473 child { node {\type{IPackageFragment}}
2474 child { node {\type{ICompilationUnit}}
2475 child { node {\type{IType}}
2476 child { node {\type{\{ IType \}*}}
2477 child { node {\type{\ldots}}}
2478 }
2479 child [missing] {}
2480 child { node {\type{\{ IField \}*}}}
2481 child { node {\type{IMethod}}
2482 child { node {\type{\{ IType \}*}}
2483 child { node {\type{\ldots}}}
2484 }
2485 }
2486 child [missing] {}
2487 child [missing] {}
2488 child { node {\type{\{ IMethod \}*}}}
2489 }
2490 child [missing] {}
2491 child [missing] {}
2492 child [missing] {}
2493 child [missing] {}
2494 child [missing] {}
2495 child [missing] {}
2496 child [missing] {}
2497 child { node {\type{\{ IType \}*}}}
2498 }
2499 child [missing] {}
2500 child [missing] {}
2501 child [missing] {}
2502 child [missing] {}
2503 child [missing] {}
2504 child [missing] {}
2505 child [missing] {}
2506 child [missing] {}
2507 child [missing] {}
2508 child { node {\type{\{ ICompilationUnit \}*}}}
2509 }
2510 child [missing] {}
2511 child [missing] {}
2512 child [missing] {}
2513 child [missing] {}
2514 child [missing] {}
2515 child [missing] {}
2516 child [missing] {}
2517 child [missing] {}
2518 child [missing] {}
2519 child [missing] {}
2520 child [missing] {}
2521 child { node {\type{\{ IPackageFragment \}*}}}
2522 }
2523 child [missing] {}
2524 child [missing] {}
2525 child [missing] {}
2526 child [missing] {}
2527 child [missing] {}
2528 child [missing] {}
2529 child [missing] {}
2530 child [missing] {}
2531 child [missing] {}
2532 child [missing] {}
2533 child [missing] {}
2534 child [missing] {}
2535 child [missing] {}
2536 child { node {\type{\{ IPackageFragmentRoot \}*}}}
2537 ;
2538 \end{tikzpicture}
fe0a4c48 2539 \caption{The Java model of \name{Eclipse}. ``\type{\{ SomeElement \}*}'' means
5308274d
EK
2540 \type{SomeElement} zero or more times. For recursive structures,
2541 ``\type{\ldots}'' is used.}
2542 \label{fig:javaModel}
2543\end{figure}
2544
3ab3e132 2545\section{The Abstract Syntax Tree}
fe0a4c48 2546\name{Eclipse} is following the common paradigm of using an abstract syntax tree for
03674629
EK
2547source code analysis and manipulation.
2548
03674629
EK
2549When parsing program source code into something that can be used as a foundation
2550for analysis, the start of the process follows the same steps as in a compiler.
3ab3e132 2551This is all natural, because the way a compiler analyzes code is no different
03674629
EK
2552from how source manipulation programs would do it, except for some properties of
2553code that is analyzed in the parser, and that they may be differing in what
4e468834 2554kinds of properties they analyze. Thus the process of translation source code
03674629 2555into a structure that is suitable for analyzing, can be seen as a kind of
65e213db
EK
2556interrupted compilation process \see{fig:interruptedCompilationProcess}.
2557
2558\begin{figure}[h]
2559 \centering
2560 \tikzset{
c876d1a4 2561 base/.style={anchor=north, align=center, rectangle, minimum height=1.4cm},
65e213db 2562 basewithshadow/.style={base, drop shadow, fill=white},
c876d1a4
EK
2563 outlined/.style={basewithshadow, draw, rounded corners, minimum
2564 width=0.4cm},
2565 primary/.style={outlined, font=\bfseries},
65e213db 2566 dashedbox/.style={outlined, dashed},
62563950
EK
2567 arrowpath/.style={black, align=center, font=\small},
2568 processarrow/.style={arrowpath, ->, >=angle 90, shorten >=1pt},
65e213db 2569 }
62563950 2570 \begin{tikzpicture}[node distance=1.3cm and 3cm, scale=1, every
c876d1a4 2571 node/.style={transform shape}]
62563950
EK
2572 \node[base](AuxNode1){\small source code};
2573 \node[primary, right=of AuxNode1, xshift=-2.5cm](Scanner){Scanner};
c876d1a4 2574 \node[primary, right=of Scanner, xshift=0.5cm](Parser){Parser};
72e039dc
EK
2575 \node[dashedbox, below=of Parser](SemanticAnalyzer){Semantic\\Analyzer};
2576 \node[dashedbox, left=of SemanticAnalyzer](SourceCodeOptimizer){Source
2577 Code\\Optimizer};
2578 \node[dashedbox, below=of SourceCodeOptimizer
c876d1a4 2579 ](CodeGenerator){Code\\Generator};
72e039dc
EK
2580 \node[dashedbox, right=of CodeGenerator](TargetCodeOptimizer){Target
2581 Code\\Optimizer};
2582 \node[base, right=of TargetCodeOptimizer](AuxNode2){};
c876d1a4 2583
62563950
EK
2584 \draw[processarrow](AuxNode1) -- (Scanner);
2585
2586 \path[arrowpath] (Scanner) -- node [sloped](tokens){tokens}(Parser);
2587 \draw[processarrow](Scanner) -- (tokens) -- (Parser);
2588
2589 \path[arrowpath] (Parser) -- node (syntax){syntax
2590 tree}(SemanticAnalyzer);
2591 \draw[processarrow](Parser) -- (syntax) -- (SemanticAnalyzer);
2592
2593 \path[arrowpath] (SemanticAnalyzer) -- node
2594 [sloped](annotated){annotated\\tree}(SourceCodeOptimizer);
2595 \draw[processarrow, dashed](SemanticAnalyzer) -- (annotated) --
2596 (SourceCodeOptimizer);
2597
2598 \path[arrowpath] (SourceCodeOptimizer) -- node
2599 (intermediate){intermediate code}(CodeGenerator);
2600 \draw[processarrow, dashed](SourceCodeOptimizer) -- (intermediate) --
2601 (CodeGenerator);
2602
2603 \path[arrowpath] (CodeGenerator) -- node [sloped](target1){target
c876d1a4 2604 code}(TargetCodeOptimizer);
62563950
EK
2605 \draw[processarrow, dashed](CodeGenerator) -- (target1) --
2606 (TargetCodeOptimizer);
2607
2608 \path[arrowpath](TargetCodeOptimizer) -- node [sloped](target2){target
c876d1a4 2609 code}(AuxNode2);
62563950 2610 \draw[processarrow, dashed](TargetCodeOptimizer) -- (target2) (AuxNode2);
65e213db 2611 \end{tikzpicture}
72e039dc 2612 \caption{Interrupted compilation process. {\footnotesize (Full compilation
52da2102
EK
2613 process borrowed from \emph{Compiler construction: principles and practice}
2614 by Kenneth C. Louden\citing{louden1997}.)}}
65e213db
EK
2615 \label{fig:interruptedCompilationProcess}
2616\end{figure}
2617
03674629
EK
2618The process starts with a \emph{scanner}, or lexer. The job of the scanner is to
2619read the source code and divide it into tokens for the parser. Therefore, it is
2620also sometimes called a tokenizer. A token is a logical unit, defined in the
2621language specification, consisting of one or more consecutive characters. In
3ab3e132 2622the Java language the tokens can for instance be the \var{this} keyword, a curly
03674629 2623bracket \var{\{} or a \var{nameToken}. It is recognized by the scanner on the
3ab3e132 2624basis of something equivalent of a regular expression. This part of the process
03674629
EK
2625is often implemented with the use of a finite automata. In fact, it is common to
2626specify the tokens in regular expressions, that in turn is translated into a
2627finite automata lexer. This process can be automated.
2628
3ab3e132 2629The program component used to translate a stream of tokens into something
03674629
EK
2630meaningful, is called a parser. A parser is fed tokens from the scanner and
2631performs an analysis of the structure of a program. It verifies that the syntax
2632is correct according to the grammar rules of a language, that is usually
2633specified in a context-free grammar, and often in a variant of the
fe0a4c48 2634\name{Backus--Naur
03674629
EK
2635Form}\footnote{\url{https://en.wikipedia.org/wiki/Backus-Naur\_Form}}. The
2636result coming from the parser is in the form of an \emph{Abstract Syntax Tree},
2637AST for short. It is called \emph{abstract}, because the structure does not
2638contain all of the tokens produced by the scanner. It only contain logical
2639constructs, and because it forms a tree, all kinds of parentheses and brackets
2640are implicit in the structure. It is this AST that is used when performing the
2641semantic analysis of the code.
2642
2643As an example we can think of the expression \code{(5 + 7) * 2}. The root of
fe0a4c48 2644this tree would in \name{Eclipse} be an \type{InfixExpression} with the operator
d11bcf4d
EK
2645\var{TIMES}, and a left operand that is also an \type{InfixExpression} with the
2646operator \var{PLUS}. The left operand \type{InfixExpression}, has in turn a left
2647operand of type \type{NumberLiteral} with the value \var{``5''} and a right
2648operand \type{NumberLiteral} with the value \var{``7''}. The root will have a
2649right operand of type \type{NumberLiteral} and value \var{``2''}. The AST for
2650this expression is illustrated in \myref{fig:astInfixExpression}.
2651
3ab3e132
EK
2652Contrary to the Java Model, an abstract syntax tree is a heavy-weight
2653representation of source code. It contains information about properties like
2654type bindings for variables and variable bindings for names.
4e468834
EK
2655
2656
d11bcf4d
EK
2657\begin{figure}[h]
2658 \centering
a1d68d95 2659 \begin{tikzpicture}[scale=0.8]
894dce0d 2660 \tikzset{level distance=40pt}
a1d68d95
EK
2661 \tikzset{sibling distance=5pt}
2662 \tikzstyle{thescale}=[scale=0.8]
2663 \tikzset{every tree node/.style={align=center}}
d11bcf4d 2664 \tikzset{edge from parent/.append style={thick}}
a1d68d95
EK
2665 \tikzstyle{inode}=[rectangle,rounded corners,draw,fill=lightgray,drop
2666 shadow,align=center]
2667 \tikzset{every internal node/.style={inode}}
894dce0d 2668 \tikzset{every leaf node/.style={draw=none,fill=none}}
d11bcf4d 2669
894dce0d
EK
2670 \Tree [.\type{InfixExpression} [.\type{InfixExpression}
2671 [.\type{NumberLiteral} \var{``5''} ] [.\type{Operator} \var{PLUS} ]
2672 [.\type{NumberLiteral} \var{``7''} ] ]
d11bcf4d
EK
2673 [.\type{Operator} \var{TIMES} ]
2674 [.\type{NumberLiteral} \var{``2''} ]
2675 ]
2676 \end{tikzpicture}
894dce0d 2677 \caption{The abstract syntax tree for the expression \code{(5 + 7) * 2}.}
d11bcf4d
EK
2678 \label{fig:astInfixExpression}
2679\end{figure}
03674629 2680
c8088eec 2681\subsection{The AST in Eclipse}\label{astEclipse}
fe0a4c48 2682In \name{Eclipse}, every node in the AST is a child of the abstract superclass
03674629
EK
2683\typewithref{org.eclipse.jdt.core.dom}{ASTNode}. Every \type{ASTNode}, among a
2684lot of other things, provides information about its position and length in the
2685source code, as well as a reference to its parent and to the root of the tree.
2686
2687The root of the AST is always of type \type{CompilationUnit}. It is not the same
2688as an instance of an \type{ICompilationUnit}, which is the compilation unit
894dce0d 2689handle of the Java model. The children of a \type{CompilationUnit} is an
03674629
EK
2690optional \type{PackageDeclaration}, zero or more nodes of type
2691\type{ImportDecaration} and all its top-level type declarations that has node
2692types \type{AbstractTypeDeclaration}.
2693
2694An \type{AbstractType\-Declaration} can be one of the types
2695\type{AnnotationType\-Declaration}, \type{Enum\-Declaration} or
2696\type{Type\-Declaration}. The children of an \type{AbstractType\-Declaration}
2697must be a subtype of a \type{BodyDeclaration}. These subtypes are:
2698\type{AnnotationTypeMember\-Declaration}, \type{EnumConstant\-Declaration},
2699\type{Field\-Declaration}, \type{Initializer} and \type{Method\-Declaration}.
2700
2701Of the body declarations, the \type{Method\-Declaration} is the most interesting
2702one. Its children include lists of modifiers, type parameters, parameters and
2703exceptions. It has a return type node and a body node. The body, if present, is
2704of type \type{Block}. A \type{Block} is itself a \type{Statement}, and its
2705children is a list of \type{Statement} nodes.
2706
2707There are too many types of the abstract type \type{Statement} to list up, but
2708there exists a subtype of \type{Statement} for every statement type of Java, as
2709one would expect. This also applies to the abstract type \type{Expression}.
2710However, the expression \type{Name} is a little special, since it is both used
2711as an operand in compound expressions, as well as for names in type declarations
2712and such.
2713
fe0a4c48 2714There is an overview of some of the structure of an \name{Eclipse} AST in
94deee9e
EK
2715\myref{fig:astEclipse}.
2716
e8173df5
EK
2717\begin{figure}[h]
2718 \centering
5e5908eb 2719 \begin{tikzpicture}[scale=0.8]
0f918507
EK
2720 \tikzset{level distance=50pt}
2721 \tikzset{sibling distance=5pt}
5e5908eb 2722 \tikzstyle{thescale}=[scale=0.8]
e8173df5 2723 \tikzset{every tree node/.style={align=center}}
5e5908eb
EK
2724 \tikzset{edge from parent/.append style={thick}}
2725 \tikzstyle{inode}=[rectangle,rounded corners,draw,fill=lightgray,drop
2726 shadow,align=center]
2727 \tikzset{every internal node/.style={inode}}
e8173df5
EK
2728 \tikzset{every leaf node/.style={draw=none,fill=none}}
2729
e601ce99
EK
2730 \Tree [.\type{CompilationUnit} [.\type{[ PackageDeclaration ]} [.\type{Name} ]
2731 [.\type{\{ Annotation \}*} ] ]
2732 [.\type{\{ ImportDeclaration \}*} [.\type{Name} ] ]
0f918507 2733 [.\type{\{ AbstractTypeDeclaration \}+} [.\node(site){\type{\{
e601ce99 2734 BodyDeclaration \}*}}; ] [.\type{SimpleName} ] ]
e8173df5 2735 ]
e601ce99 2736 \begin{scope}[shift={(0.5,-6)}]
5e5908eb 2737 \node[inode,thescale](root){\type{MethodDeclaration}};
e601ce99 2738 \node[inode,thescale](modifiers) at (4.5,-5){\type{\{ IExtendedModifier \}*}
5e5908eb 2739 \\ {\footnotesize (Of type \type{Modifier} or \type{Annotation})}};
e601ce99 2740 \node[inode,thescale](typeParameters) at (-6,-3.5){\type{\{ TypeParameter
5e5908eb 2741 \}*}};
fbeec228 2742 \node[inode,thescale](parameters) at (-5,-5){\type{\{
5e5908eb 2743 SingleVariableDeclaration \}*} \\ {\footnotesize (Parameters)}};
e601ce99 2744 \node[inode,thescale](exceptions) at (5,-3){\type{\{ Name \}*} \\
5e5908eb 2745 {\footnotesize (Exceptions)}};
e601ce99 2746 \node[inode,thescale](return) at (-6.5,-2){\type{Type} \\ {\footnotesize
5e5908eb 2747 (Return type)}};
e601ce99
EK
2748 \begin{scope}[shift={(0,-5)}]
2749 \Tree [.\node(body){\type{[ Block ]} \\ {\footnotesize (Body)}};
2750 [.\type{\{ Statement \}*} [.\type{\{ Expression \}*} ]
2751 [.\type{\{ Statement \}*} [.\type{\ldots} ]]
2752 ]
2753 ]
2754 \end{scope}
0f918507 2755 \end{scope}
e601ce99
EK
2756 \draw[->,>=triangle 90,shorten >=1pt](root.east)..controls +(east:2) and
2757 +(south:1)..(site.south);
0f918507 2758
5e5908eb
EK
2759 \draw (root.south) -- (modifiers);
2760 \draw (root.south) -- (typeParameters);
2761 \draw (root.south) -- ($ (parameters.north) + (2,0) $);
2762 \draw (root.south) -- (exceptions);
2763 \draw (root.south) -- (return);
2764 \draw (root.south) -- (body);
2765
e8173df5 2766 \end{tikzpicture}
fe0a4c48 2767 \caption{The format of the abstract syntax tree in \name{Eclipse}.}
e8173df5
EK
2768 \label{fig:astEclipse}
2769\end{figure}
94deee9e 2770\todoin{Add more to the AST format tree? \myref{fig:astEclipse}}
a2868580 2771
b8fce5af 2772\section{The ASTVisitor}\label{astVisitor}
3ab3e132
EK
2773So far, the only thing that has been addressed is how the data that is going to
2774be the basis for our analysis is structured. Another aspect of it is how we are
2775going to traverse the AST to gather the information we need, so we can conclude
2776about the properties we are analysing. It is of course possible to start at the
2777top of the tree, and manually search through its nodes for the ones we are
2778looking for, but that is a bit inconvenient. To be able to efficiently utilize
2779such an approach, we would need to make our own framework for traversing the
2780tree and visiting only the types of nodes we are after. Luckily, this
fe0a4c48 2781functionality is already provided in \name{Eclipse}, by its
50976f51
EK
2782\typewithref{org.eclipse.jdt.core.dom}{ASTVisitor}.
2783
fe0a4c48
EK
2784The \name{Eclipse} AST, together with its \type{ASTVisitor}, follows the
2785\pattern{Visitor} pattern\citing{designPatterns}. The intent of this design
2786pattern is to facilitate extending the functionality of classes without touching
2787the classes themselves.
0a8ca90c 2788
fe0a4c48
EK
2789Let us say that there is a class hierarchy of elements. These elements all have
2790a method \method{accept(Visitor visitor)}. In its simplest form, the
0a8ca90c
EK
2791\method{accept} method just calls the \method{visit} method of the visitor with
2792itself as an argument, like this: \code{visitor.visit(this)}. For the visitors
2793to be able to extend the functionality of all the classes in the elements
2794hierarchy, each \type{Visitor} must have one visit method for each concrete
2795class in the hierarchy. Say the hierarchy consists of the concrete classes
2796\type{ConcreteElementA} and \type{ConcreteElementB}. Then each visitor must have
2797the (possibly empty) methods \method{visit(ConcreteElementA element)} and
2798\method{visit(ConcreteElementB element)}. This scenario is depicted in
2799\myref{fig:visitorPattern}.
50976f51 2800
3572a8ac
EK
2801\begin{figure}[h]
2802 \centering
2803 \tikzstyle{abstract}=[rectangle, draw=black, fill=white, drop shadow, text
2804 centered, anchor=north, text=black, text width=6cm, every one node
2805part/.style={align=center, font=\bfseries\itshape}]
2806 \tikzstyle{concrete}=[rectangle, draw=black, fill=white, drop shadow, text
2807 centered, anchor=north, text=black, text width=6cm]
2808 \tikzstyle{inheritarrow}=[->, >=open triangle 90, thick]
2809 \tikzstyle{commentarrow}=[->, >=angle 90, dashed]
2810 \tikzstyle{line}=[-, thick]
2811 \tikzset{every one node part/.style={align=center, font=\bfseries}}
2812 \tikzset{every second node part/.style={align=center, font=\ttfamily}}
3572a8ac
EK
2813
2814 \begin{tikzpicture}[node distance=1cm, scale=0.8, every node/.style={transform
2815 shape}]
2816 \node (Element) [abstract, rectangle split, rectangle split parts=2]
2817 {
2818 \nodepart{one}{Element}
2819 \nodepart{second}{+accept(visitor: Visitor)}
2820 };
2821 \node (AuxNode01) [text width=0, minimum height=2cm, below=of Element] {};
2822 \node (ConcreteElementA) [concrete, rectangle split, rectangle split
2823 parts=2, left=of AuxNode01]
2824 {
2825 \nodepart{one}{ConcreteElementA}
2826 \nodepart{second}{+accept(visitor: Visitor)}
2827 };
2828 \node (ConcreteElementB) [concrete, rectangle split, rectangle split
2829 parts=2, right=of AuxNode01]
2830 {
2831 \nodepart{one}{ConcreteElementB}
2832 \nodepart{second}{+accept(visitor: Visitor)}
2833 };
2834
2835 \node[comment, below=of ConcreteElementA] (CommentA) {visitor.visit(this)};
2836
2837 \node[comment, below=of ConcreteElementB] (CommentB) {visitor.visit(this)};
2838
2839 \node (AuxNodeX) [text width=0, minimum height=1cm, below=of AuxNode01] {};
2840
2841 \node (Visitor) [abstract, rectangle split, rectangle split parts=2,
2842 below=of AuxNodeX]
2843 {
2844 \nodepart{one}{Visitor}
2845 \nodepart{second}{+visit(ConcreteElementA)\\+visit(ConcreteElementB)}
2846 };
2847 \node (AuxNode02) [text width=0, minimum height=2cm, below=of Visitor] {};
2848 \node (ConcreteVisitor1) [concrete, rectangle split, rectangle split
2849 parts=2, left=of AuxNode02]
2850 {
2851 \nodepart{one}{ConcreteVisitor1}
2852 \nodepart{second}{+visit(ConcreteElementA)\\+visit(ConcreteElementB)}
2853 };
2854 \node (ConcreteVisitor2) [concrete, rectangle split, rectangle split
2855 parts=2, right=of AuxNode02]
2856 {
2857 \nodepart{one}{ConcreteVisitor2}
2858 \nodepart{second}{+visit(ConcreteElementA)\\+visit(ConcreteElementB)}
2859 };
2860
2861
2862 \draw[inheritarrow] (ConcreteElementA.north) -- ++(0,0.7) -|
2863 (Element.south);
2864 \draw[line] (ConcreteElementA.north) -- ++(0,0.7) -|
2865 (ConcreteElementB.north);
2866
2867 \draw[inheritarrow] (ConcreteVisitor1.north) -- ++(0,0.7) -|
2868 (Visitor.south);
2869 \draw[line] (ConcreteVisitor1.north) -- ++(0,0.7) -|
2870 (ConcreteVisitor2.north);
2871
2872 \draw[commentarrow] (CommentA.north) -- (ConcreteElementA.south);
2873 \draw[commentarrow] (CommentB.north) -- (ConcreteElementB.south);
2874
2875
2876 \end{tikzpicture}
2877 \caption{The Visitor Pattern.}
2878 \label{fig:visitorPattern}
2879\end{figure}
2880
0a8ca90c
EK
2881The use of the visitor pattern can be appropriate when the hierarchy of elements
2882is mostly stable, but the family of operations over its elements is constantly
fe0a4c48 2883growing. This is clearly the case for the \name{Eclipse} AST, since the hierarchy of
0a8ca90c
EK
2884type \type{ASTNode} is very stable, but the functionality of its elements is
2885extended every time someone needs to operate on the AST. Another aspect of the
fe0a4c48 2886\name{Eclipse} implementation is that it is a public API, and the visitor pattern is an
0a8ca90c
EK
2887easy way to provide access to the nodes in the tree.
2888
fe0a4c48 2889The version of the visitor pattern implemented for the AST nodes in \name{Eclipse} also
0a8ca90c
EK
2890provides an elegant way to traverse the tree. It does so by following the
2891convention that every node in the tree first let the visitor visit itself,
b3adff95
EK
2892before it also makes all its children accept the visitor. The children are only
2893visited if the visit method of their parent returns \var{true}. This pattern
2894then makes for a prefix traversal of the AST. If postfix traversal is desired,
2895the visitors also has \method{endVisit} methods for each node type, that is
2896called after the \method{visit} method for a node. In addition to these visit
2897methods, there are also the methods \method{preVisit(ASTNode)},
2898\method{postVisit(ASTNode)} and \method{preVisit2(ASTNode)}. The
2899\method{preVisit} method is called before the type-specific \method{visit}
2900method. The \method{postVisit} method is called after the type-specific
2901\method{endVisit}. The type specific \method{visit} is only called if
2902\method{preVisit2} returns \var{true}. Overriding the \method{preVisit2} is also
2903altering the behavior of \method{preVisit}, since the default implementation is
94c59647
EK
2904responsible for calling it.
2905
2906An example of a trivial \type{ASTVisitor} is shown in
2907\myref{lst:astVisitorExample}.
2908
2909\begin{listing}
2910\begin{minted}{java}
2911public class CollectNamesVisitor extends ASTVisitor {
2912 Collection<Name> names = new LinkedList<Name>();
2913
2914 @Override
2915 public boolean visit(QualifiedName node) {
2916 names.add(node);
2917 return false;
2918 }
2919
2920 @Override
2921 public boolean visit(SimpleName node) {
2922 names.add(node);
2923 return true;
2924 }
2925}
2926\end{minted}
2927\caption{An \type{ASTVisitor} that visits all the names in a subtree and adds
2928them to a collection, except those names that are children of any
2929\type{QualifiedName}.}
2930\label{lst:astVisitorExample}
2931\end{listing}
2932
b8fce5af
EK
2933\section{Property collectors}\label{propertyCollectors}
2934The prefixes and unfixes are found by property
2935collectors\typeref{no.uio.ifi.refaktor.extractors.collectors.PropertyCollector}.
2936A property collector is of the \type{ASTVisitor} type, and thus visits nodes of
2937type \type{ASTNode} of the abstract syntax tree \see{astVisitor}.
2938
2939\subsection{The PrefixesCollector}
2940The \typewithref{no.uio.ifi.refaktor.extractors.collectors}{PrefixesCollector}
ccd252c5 2941finds prefixes that makes up the basis for calculating move targets for the
fe0a4c48 2942\refa{Extract and Move Method} refactoring. It visits expression
b8fce5af
EK
2943statements\typeref{org.eclipse.jdt.core.dom.ExpressionStatement} and creates
2944prefixes from its expressions in the case of method invocations. The prefixes
2945found is registered with a prefix set, together with all its sub-prefixes.
2946
2947\subsection{The UnfixesCollector}\label{unfixes}
2948The \typewithref{no.uio.ifi.refaktor.extractors.collectors}{UnfixesCollector}
47c0bea8 2949finds unfixes within a selection.
b8fce5af 2950
b8fce5af 2951
0a8ca90c 2952
5195bf0c
EK
2953\subsection{The ContainsReturnStatementCollector}
2954The
2955\typewithref{no.uio.ifi.refaktor.analyze.collectors}{ContainsReturnStatementCollector}
2956is a very simple property collector. It only visits the return statements within
2957a selection, and can report whether it encountered a return statement or not.
2958
b8d069e4
EK
2959\subsection{The LastStatementCollector}
2960The \typewithref{no.uio.ifi.refaktor.analyze.collectors}{LastStatementCollector}
2961collects the last statement of a selection. It does so by only visiting the top
2962level statements of the selection, and compares the textual end offset of each
3ab3e132 2963encountered statement with the end offset of the previous statement found.
b8d069e4 2964
95c0f364 2965\section{Checkers}\label{checkers}
801ff00a 2966\todoin{Check out ExtractMethodAnalyzer from ExtractMethodRefactoring}
d6f8e65a
EK
2967The checkers are a range of classes that checks that text selections complies
2968with certain criteria. All checkers operates under the assumption that the code
2969they check is free from compilation errors. If a
95c0f364
EK
2970\typewithref{no.uio.ifi.refaktor.analyze.analyzers}{Checker} fails, it throws a
2971\type{CheckerException}. The checkers are managed by the
2972\type{LegalStatementsChecker}, which does not, in fact, implement the
2973\type{Checker} interface. It does, however, run all the checkers registered with
2974it, and reports that all statements are considered legal if no
08cbba3b 2975\type{CheckerException} is thrown. Many of the checkers either extends the
f72f72f1 2976\type{PropertyCollector} or utilizes one or more property collectors to verify
3ab3e132 2977some criteria. The checkers registered with the \type{LegalStatementsChecker}
f72f72f1 2978are described next. They are run in the order presented below.
95c0f364 2979
a22915d0
EK
2980\subsection{The CallToProtectedOrPackagePrivateMethodChecker}
2981This checker is designed to prevent an error that can occur in situations where
2982a method is declared in one class, but overridden in another. If a text
2983selection contains a call to a method like this, and the seletion is extracted
2984to a new method, the subsequent movement of this method could cause the code to
1c521a77 2985break.
a22915d0
EK
2986
2987The code breaks in situations where the method call in the selection is to a
2988method that has the \code{protected} modifier, or it does not have any access
1c521a77 2989modifiers, i.e. it is package-private.
a22915d0
EK
2990
2991The workings of the \type{CallToProtectedOrPackagePrivateMethod\-Checker} is
2992therefore very simple. It looks for calls to methods that are either protected
2993or package-private within the selection, and throws an
2994\type{IllegalExpressionFoundException} if one is found.
2995
2996The problem this checker helps to avoid, is a little subtle. The problem does
2997not arise in the class where the change is done, but in a class derived from it.
2998This shows that classes acting as superclasses are especially fragile to
2999introducing errors in the context of automated refactoring. This is also shown
3000in bug\ldots \todoin{File Eclipse bug report}
3001
2a4b8dea 3002\subsection{The InstantiationOfNonStaticInnerClassChecker}
1c521a77 3003\todoin{Contains duplicate text from semantic section}
2a4b8dea
EK
3004When a non-static inner class is instatiated, this must happen in the scope of
3005its declaring class. This is because it must have access to the members of the
3006declaring class. If the inner class is public, it is possible to instantiate it
3007through an instance of its declaring class, but this is not handled by the
3008\type{MoveInstanceMethodProcessor} in Eclipse when moving a method. Therefore,
3009performing a move on a method that instantiates a non-static inner class, will
3010break the code if the instantiation is not handled properly. For this reason,
3011the \type{InstantiationOfNonStaticInnerClassChecker} does not validate
3012selections that contains instantiations of non-static inner classes. This
3013problem is also related to bug\ldots \todoin{File Eclipse bug report}
3014
8d0caf4c 3015\subsection{The EnclosingInstanceReferenceChecker}
1c521a77
EK
3016\todoin{Contains duplicate text from semantic section}
3017The purpose of this checker is to verify that the names in a selection are not
8d0caf4c
EK
3018referencing any enclosing instances. This is for making sure that all references
3019is legal in a method that is to be moved. Theoretically, some situations could
3020be easily solved my passing a reference to the referenced class with the moved
3021method (e.g. when calling public methods), but the dependency on the
3022\type{MoveInstanceMethodProcessor} prevents this.
3023
3024The
3025\typewithref{no.uio.ifi.refaktor.analyze.analyzers}{EnclosingInstanceReferenceChecker}
3026is a modified version of the
801ff00a 3027\typewithref{org.eclipse.jdt.internal.corext.refactoring.structure.MoveInstanceMethod\-Processor}{EnclosingInstanceReferenceFinder}
8d0caf4c
EK
3028from the \type{MoveInstanceMethodProcessor}. Wherever the
3029\type{EnclosingInstanceReferenceFinder} would create a fatal error status, the
3030checker throws a \type{CheckerException}.
3031
3032It works by first finding all of the enclosing types of a selection. Thereafter
3033it visits all its simple names to check that they are not references to
3034variables or methods declared in any of the enclosing types. In addition the
1c521a77 3035checker visits \var{this}-expressions to verify that no such expressions are
8d0caf4c
EK
3036qualified with any name.
3037
9cc2cd59 3038\subsection{The ReturnStatementsChecker}\label{returnStatementsChecker}
15327961 3039\todoin{Contains duplicate text from semantic section}
801ff00a 3040The checker for return statements is meant to verify that if a text selection
d6f8e65a
EK
3041contains a return statement, then every possible execution path within the
3042selection ends in a return statement. This property is important regarding the
3043\ExtractMethod refactoring. If it holds, it means that a method could be
3044extracted from the selection, and a call to it could be substituted for the
801ff00a
EK
3045selection. If the method has a non-void return type, then a call to it would
3046also be a valid return point for the calling method. If its return value is of
3047the void type, then the \type{ExtractMethodRefactoring} of \name{Eclipse}
3048appends an empty return statement to the back of the method call. Therefore, the
3049analysis does not discriminate on either kinds of return statements, with or
3050without a return value.
3051
3052The property description implies that if the selection is free from return
d6f8e65a
EK
3053statements, then the checker validates. So this is the first thing the checker
3054investigates.
3055
3056If the checker proceedes any further, it is because the selection contains one
3057or more return statements. The next test is therefore to check if the last
801ff00a
EK
3058statement of the selection ends in either a return or a throw statement. If the
3059last statement of the selection ends in a return statement, then all execution
d6f8e65a
EK
3060paths within the selection should end in either this, or another, return
3061statement. This is also true for a throw statement, since it causes an immediate
3062exit from the current block, together with all outer blocks in its control flow
3063that does not catch the thrown exception.
3064
3065Return statements can be either explicit or implicit. An \emph{explicit} return
3066statement is formed by using the \code{return} keyword, while an \emph{implicit}
3067return statement is a statement that is not formed by the \code{return} keyword,
801ff00a
EK
3068but must be the last statement of a method that can have any side effects. This
3069can happen in methods with a void return type. An example is a statement that is
3070inside one or more blocks. The last statement of a method could for instance be
3071an if-statement, but the last statement that is executed in the method, and that
3072can have any side effects, may be located inside the block of the else part of
3073the if-statement.
3074
3075The responsibility for checking that the last statement of the selection
3076eventually ends in a return or throw statement, is put on the
3077\type{LastStatementOfSelectionEndsInReturnOrThrowChecker}. For every node
3078visited, if it is a statement, it does a test to see if the statement is a
3079return, a throw or if it is an implicit return statement. If this is the case,
3080no further checking is done. This checking is done in the \code{preVisit2}
3081method \see{astVisitor}. If the node is not of a type that is being handled by
3082its type specific visit method, the checker performs a simple test. If the node
3083being visited is not the last statement of its parent that is also enclosed by
3084the selection, an \type{IllegalStatementFoundException} is thrown. This ensures
3085that all statements are taken care of, one way or the other. It also ensures
3086that the checker is conservative in the way it checks for legality of the
3087selection.
3088
3089To examine if a statement is an implicit return statement, the checker first
3090finds the last statement declared in its enclosing method. If this statement is
3091the same as the one under investigation, it is considered an implicit return
3092statement. If the statements are not the same, the checker does a search to see
3093if statement examined is also the last statement of the method that can be
3094reached. This includes the last statement of a block statement, a labeled
3095statement, a synchronized statement or a try statement, that in turn is the last
3096statement enclosed by the statement types listed. This search goes through all
3097the parents of a statement until a statement is found that is not one of the
3098mentioned acceptable parent statements. If the search ends in a method
3099declaration, then the statement is considered to be the last reachable statement
3100of the method, and thus also an implicit return statement.
3101
3102There are two kinds of statements that are handled explicitly. It is
3103if-statements and try-statements. Block, labeled and do-statements are handled
3104by fall-through to the other two. Do-statements are considered equal to blocks
3105in this context, since their bodies are always evaluated at least one time. If-
3106and try-statements are visited only if they are the last node of their parent
3107within the selection.
3108
3109For if-statements, the rule is that if the then-part does not contain any return
3110or throw statements, it is considered illegal. If it does contain a return or
3111throw, its else-part is checked. If the else-part is non-existent, or it does
3112not contain any return or throw statements, it is considered illegal. If the
3113statement is not regarded illegal, its children are visited.
3114
3115Try-statements are handled much the same way as if-statements. Its body must
3116contain a return or throw. The same applies to its catch clauses and finally
3117body.
3118
3119If the checker does not complain at any point, the selection is considered valid
3120with respect to return statements.
41cde50e
EK
3121
3122\subsection{The AmbiguousReturnValueChecker}
11d9e6e1
EK
3123\todoin{revisit}
3124This checker verifies that there are no \emph{ambiguous return values} in a
3125selection.
9cc2cd59
EK
3126
3127First the checker needs to collect some data. Those data are the binding keys
3128for all simple names that are assigned to within the selection, including
3129variable declarations, but excluding fields. The checker also collects whether
3130there exists a return statement in the selection or not. No further checks of
3131return statements are needed, since, at this point, the selection is already
3132checked for illegal return statements \see{returnStatementsChecker}.
3133
3134After the binding keys of the assignees are collected, the checker searches the
3135part of the enclosing method that is after the selection for references whose
3ab3e132
EK
3136binding keys are among the collected keys. If more than one unique referral is
3137found, or only one referral is found, but the selection also contains a return
3138statement, we have a situation with an ambiguous return value, and an exception
3139is thrown.
9cc2cd59
EK
3140
3141%\todoin{Explain why we do not need to consider variables assigned inside
3142%local/anonymous classes. (The referenced variables need to be final and so
3143%on\ldots)}
41cde50e
EK
3144
3145\subsection{The IllegalStatementsChecker}
83f12332 3146\todoin{Contains duplicate text from semantic section}
41cde50e
EK
3147This checker is designed to check for illegal statements.
3148
08cbba3b
EK
3149Any use of the \var{super} keyword is prohibited, since its meaning is altered
3150when moving a method to another class.
3151
3152For a \emph{break} statement, there is two situations to consider: A break
3153statement with or without a label. If the break statement has a label, it is
3154checked that whole of the labeled statement is inside the selection. Since a
3155label does not have any binding information, we have to search upwards in the
3156AST to find the \type{LabeledStatement} that corresponds to the label from the
3157break statement, and check that it is contained in the selection. If the break
3158statement does not have a label attached to it, it is checked that its innermost
3159enclosing loop or switch statement also is inside the selection.
3160
3161The situation for a \emph{continue} statement is the same as for a break
3162statement, except that it is not allowed inside switch statements.
3163
3164Regarding \emph{assignments}, two types of assignments is allowed: Assignment to
3165a non-final variable and assignment to an array access. All other assignments is
3166regarded illegal.
3167
3168\todoin{Finish\ldots}
41cde50e 3169
356782a0
EK
3170
3171\chapter{Benchmarking}
3172\todoin{Better name than ``benchmarking''?}
60065669 3173This part of the master's project is located in the \name{Eclipse} project
356782a0
EK
3174\code{no.uio.ifi.refaktor.benchmark}. The purpose of it is to run the equivalent
3175of the \type{SearchBasedExtractAndMoveMethodChanger}
3176\see{searchBasedExtractAndMoveMethodChanger} over a larger software project,
3ab3e132 3177both to test its robustness but also its effect on different software metrics.
356782a0
EK
3178
3179\section{The benchmark setup}
fe0a4c48
EK
3180The benchmark itself is set up as a \name{JUnit} test case. This is a convenient
3181setup, and utilizes the \name{JUnit Plugin Test Launcher}. This provides us a
3182with a fully functional \name{Eclipse} workbench. Most importantly, this gives
3183us access to the Java Model of \name{Eclipse} \see{javaModel}.
356782a0
EK
3184
3185\subsection{The ProjectImporter}
3186The Java project that is going to be used as the data for the benchmark, must be
3187imported into the JUnit workspace. This is done by the
3188\typewithref{no.uio.ifi.refaktor.benchmark}{ProjectImporter}. The importer
3189require the absolute path to the project description file. It is named
3190\code{.project} and is located at the root of the project directory.
3191
3192The project description is loaded to find the name of the project to be
3193imported. The project that shall be the destination for the import is created in
3194the workspace, on the base of the name from the description. Then an import
3195operation is created, based on both the source and destination information. The
3196import operation is run to perform the import.
3197
3198I have found no simple API call to accomplish what the importer does, which
fe0a4c48
EK
3199tells me that it may not be too many people performing this particular action.
3200The solution to the problem was found on \name{Stack
356782a0 3201Overflow}\footnote{\url{https://stackoverflow.com/questions/12401297}}. It
3ab3e132 3202contains enough dirty details to be considered inconvenient to use, if not
356782a0
EK
3203wrapping it in a class like my \type{ProjectImporter}. One would probably have
3204to delve into the source code for the import wizard to find out how the import
3205operation works, if no one had already done it.
3206
8fe94c0b
EK
3207\section{Statistics}
3208Statistics for the analysis and changes is captured by the
3209\typewithref{no.uio.ifi.refaktor.aspects}{StatisticsAspect}. This an
fe0a4c48 3210\emph{aspect} written in \name{AspectJ}.
8fe94c0b
EK
3211
3212\subsection{AspectJ}
fe0a4c48 3213\name{AspectJ}\footnote{\url{http://eclipse.org/aspectj/}} is an extension to
8fe94c0b
EK
3214the Java language, and facilitates combining aspect-oriented programming with
3215the object-oriented programming in Java.
3216
3217Aspect-oriented programming is a programming paradigm that is meant to isolate
3218so-called \emph{cross-cutting concerns} into their own modules. These
dd73ba39
EK
3219cross-cutting concerns are functionalities that spans over multiple classes, but
3220may not belong naturally in any of them. It can be functionality that does not
3221concern the business logic of an application, and thus may be a burden when
3222entangled with parts of the source code it does not really belong. Examples
3223include logging, debugging, optimization and security.
8fe94c0b 3224
416b6888
EK
3225Aspects are interacting with other modules by defining advices. The concept of
3226an \emph{advice} is known from both aspect-oriented and functional
3227programming\citing{wikiAdvice2014}. It is a function that modifies another
3228function when the latter is run. An advice in AspectJ is somewhat similar to a
3229method in Java. It is meant to alter the behavior of other methods, and contains
3230a body that is executed when it is applied.
8fe94c0b
EK
3231
3232An advice can be applied at a defined \emph{pointcut}. A pointcut picks out one
3233or more \emph{join points}. A join point is a well-defined point in the
3234execution of a program. It can occur when calling a method defined for a
3235particular class, when calling all methods with the same name,
3236accessing/assigning to a particular field of a given class and so on. An advice
dd73ba39
EK
3237can be declared to run both before, after returning from a pointcut, when there
3238is thrown an exception in the pointcut or after the pointcut either returns or
3239throws an exception. In addition to picking out join points, a pointcut can
3240also bind variables from its context, so they can be accessed in the body of an
3241advice. An example of a pointcut and an advice is found in
3242\myref{lst:aspectjExample}.
8fe94c0b
EK
3243
3244\begin{listing}[h]
c8088eec 3245\begin{minted}{aspectj}
8fe94c0b
EK
3246pointcut methodAnalyze(
3247 SearchBasedExtractAndMoveMethodAnalyzer analyzer) :
3248 call(* SearchBasedExtractAndMoveMethodAnalyzer.analyze())
3249 && target(analyzer);
3250
3251after(SearchBasedExtractAndMoveMethodAnalyzer analyzer) :
3252 methodAnalyze(analyzer) {
3253 statistics.methodCount++;
3254 debugPrintMethodAnalysisProgress(analyzer.method);
3255}
3256\end{minted}
3257\caption{An example of a pointcut named \method{methodAnalyze},
3258and an advice defined to be applied after it has occurred.}
3259\label{lst:aspectjExample}
3260\end{listing}
3261
dd73ba39
EK
3262\subsection{The Statistics class}
3263The statistics aspect stores statistical information in an object of type
3264\type{Statistics}. As of now, the aspect needs to be initialized at the point in
3265time where it is desired that it starts its data gathering. At any point in time
3266the statistics aspect can be queried for a snapshot of the current statistics.
3267
3268The \type{Statistics} class also include functionality for generating a report
3269of its gathered statistics. The report can be given either as a string or it can
3270be written to a file.
3271
3272\subsection{Advices}
3273The statistics aspect contains advices for gathering statistical data from
3274different parts of the benchmarking process. It captures statistics from both
3275the analysis part and the execution part of the composite \ExtractAndMoveMethod
3276refactoring.
3277
3278For the analysis part, there are advices to count the number of text selections
3279analyzed and the number of methods, types, compilation units and packages
3280analyzed. There are also advices that counts for how many of the methods there
3281is found a selection that is a candidate for the refactoring, and for how many
3ab3e132 3282methods there is not.
dd73ba39
EK
3283
3284There exists advices for counting both the successful and unsuccessful
3285executions of all the refactorings. Both for the \ExtractMethod and \MoveMethod
3286refactorings in isolation, as well as for the combination of them.
3287
8fe94c0b 3288\section{Optimizations}
41293210 3289When looking for optimizations to make for the benchmarking process, I used the
60065669
EK
3290\name{VisualVM}\footnote{\url{http://visualvm.java.net/}} \gloss{profiler} for
3291the Java Virtual Machine to both profile the application and also to make memory
3292dumps of its heap.
8fe94c0b
EK
3293
3294\subsection{Caching}
60065669
EK
3295When \gloss{profiling} the benchmark process before making any optimizations, it
3296early became apparent that the parsing of source code was a place to direct
3297attention towards. This discovery was done when only \emph{analyzing} source
3298code, before trying to do any \emph{manipulation} of it. Caching of the parsed
3299ASTs seemed like the best way to save some time, as expected. With only a simple
3300cache of the most recently used AST, the analysis time was speeded up by a
3301factor of around 20. This number depends a little upon which type of system the
3302analysis is run.
41293210
EK
3303
3304The caching is managed by a cache manager, that now, by default, utilizes the
3305not so well known feature of Java called a \emph{soft reference}. Soft
3306references are best explained in the context of weak references. A \emph{weak
3307reference} is a reference to an object instance that is only guaranteed to
3308persist as long as there is a \emph{strong reference} or a soft reference
3309referring the same object. If no such reference is found, its referred object is
3310garbage collected. A strong reference is basically the same as a regular Java
3311reference. A soft reference has the same guarantees as a week reference when it
3312comes to its relation to strong references, but it is not necessarily garbage
3313collected whenever there exists no strong references to it. A soft reference
3314\emph{may} reside in memory as long as the JVM has enough free memory in the
3315heap. A soft reference will therefore usually perform better than a weak
3316reference when used for simple caching and similar tasks. The way to use a
3317soft/weak reference is to as it for its referent. The return value then has to
3318be tested to check that it is not \var{null}. For the basic usage of soft
3319references, see \myref{lst:softReferenceExample}. For a more thorough
3320explanation of weak references in general, see\citing{weakRef2006}.
3321
3322\begin{listing}[h]
3323\begin{minted}{java}
3324// Strong reference
3325Object strongRef = new Object();
3326
3327// Soft reference
3328SoftReference<Object> softRef =
3329 new SoftReference<Object>(new Object());
3330
3331// Using the soft reference
3332Object obj = softRef.get();
3333if (obj != null) {
3334 // Use object here
3335}
3336\end{minted}
3337\caption{Showing the basic usage of soft references. Weak references is used the
3338 same way. {\footnotesize (The references are part of the \code{java.lang.ref}
3339package.)}}
3340\label{lst:softReferenceExample}
3341\end{listing}
3342
3343The cache based on soft references has no limit for how many ASTs it caches. It
3344is generally not advisable to keep references to ASTs for prolonged periods of
3345time, since they are expensive structures to hold on to. For regular plugin
fe0a4c48 3346development, \name{Eclipse} recommends not creating more than one AST at a time to
41293210
EK
3347limit memory consumption. Since the benchmarking has nothing to do with user
3348experience, and throughput is everything, these advices are intentionally
fe0a4c48 3349ignored. This means that during the benchmarking process, the target \name{Eclipse}
41293210
EK
3350application may very well work close to its memory limit for the heap space for
3351long periods during the benchmark.
8fe94c0b
EK
3352
3353\subsection{Memento}
d6f8e65a 3354\todoin{Write}
356782a0 3355
a7514fbd
EK
3356
3357\chapter{Technicalities}
3358
3359\section{Source code organization}
60065669 3360All the parts of this master's project is under version control with
a7514fbd
EK
3361\name{Git}\footnote{\url{http://git-scm.com/}}.
3362
3363The software written is organized as some \name{Eclipse} plugins. Writing a plugin is
3364the natural way to utilize the API of \name{Eclipse}. This also makes it possible to
3365provide a user interface to manually run operations on selections in program
3366source code or whole projects/packages.
3367
3368When writing a plugin in \name{Eclipse}, one has access to resources such as the
3369current workspace, the open editor and the current selection.
3370
3371The thesis work is contained in the following Eclipse projects:
3372
3373\begin{description}
3374 \item[no.uio.ifi.refaktor] \hfill \\ This is the main Eclipse plugin
3375 project, and contains all of the business logic for the plugin.
3376
3377 \item[no.uio.ifi.refaktor.tests] \hfill \\
3378 This project contains the tests for the main plugin.
3379
3380 \item[no.uio.ifi.refaktor.examples] \hfill \\
3381 Contains example code used in testing. It also contains code for managing
3382 this example code, such as creating an Eclipse project from it before a test
3383 run.
3384
3385 \item[no.uio.ifi.refaktor.benchmark] \hfill \\
3386 This project contains code for running search based versions of the
3387 composite refactoring over selected Eclipse projects.
3388
3389 \item[no.uio.ifi.refaktor.releng] \hfill \\
3390 Contains the rmap, queries and target definitions needed by by Buckminster
3391 on the Jenkins continuous integration server.
3392
3393\end{description}
3394
3395\subsection{The no.uio.ifi.refaktor project}
3396
3397\subsubsection{no.uio.ifi.refaktor.analyze}
3398This package, and its subpackages, contains code that is used for analyzing Java
3399source code. The most important subpackages are presented below.
3400
3401\begin{description}
3402 \item[no.uio.ifi.refaktor.analyze.analyzers] \hfill \\
3403 This package contains source code analyzers. These are usually responsible
3404 for analyzing text selections or running specialized analyzers for different
3405 kinds of entities. Their structure are often hierarchical. This means that
3406 you have an analyzer for text selections, that in turn is utilized by an
3407 analyzer that analyzes all the selections of a method. Then there are
3408 analyzers for analyzing all the methods of a type, all the types of a
3409 compilation unit, all the compilation units of a package, and, at last, all
3410 of the packages in a project.
3411
3412 \item[no.uio.ifi.refaktor.analyze.checkers] \hfill \\
3413 A package containing checkers. The checkers are classes used to validate
3414 that a selection can be further analyzed and chosen as a candidate for a
3415 refactoring. Invalidating properties can be such as usage of inner classes
3416 or the need for multiple return values.
3417
3418 \item[no.uio.ifi.refaktor.analyze.collectors] \hfill \\
3419 This package contains the property collectors. Collectors are used to gather
3420 properties from a text selection. This is mostly properties regarding
3421 referenced names and their occurrences. It is these properties that makes up
3422 the basis for finding the best candidates for a refactoring.
3423\end{description}
3424
3425\subsubsection{no.uio.ifi.refaktor.change}
3426This package, and its subpackages, contains functionality for manipulate source
3427code.
3428
3429\begin{description}
3430 \item[no.uio.ifi.refaktor.change.changers] \hfill \\
3431 This package contains source code changers. They are used to glue together
3432 the analysis of source code and the actual execution of the changes.
3433
3434 \item[no.uio.ifi.refaktor.change.executors] \hfill \\
3435 The executors that are responsible for making concrete changes are found in
3436 this package. They are mostly used to create and execute one or more Eclipse
3437 refactorings.
3438
3439 \item[no.uio.ifi.refaktor.change.processors] \hfill \\
3440 Contains a refactoring processor for the \MoveMethod refactoring. The code
3441 is stolen and modified to fix a bug. The related bug is described in
3442 \myref{eclipse_bug_429416}.
3443
3444\end{description}
3445
3446\subsubsection{no.uio.ifi.refaktor.handlers}
3447This package contains handlers for the commands defined in the plugin manifest.
3448
3449\subsubsection{no.uio.ifi.refaktor.prefix}
3450This package contains the \type{Prefix} type that is the data representation of
3451the prefixes found by the \type{PrefixesCollector}. It also contains the prefix
3452set for storing and working with prefixes.
3453
3454\subsubsection{no.uio.ifi.refaktor.statistics}
3455The package contains statistics functionality. Its heart is the statistics
3456aspect that is responsible for gathering statistics during the execution of the
3457\ExtractAndMoveMethod refactoring.
3458
3459\begin{description}
3460 \item[no.uio.ifi.refaktor.statistics.reports] \hfill \\
3461 This package contains a simple framework for generating reports from the
3462 statistics data generated by the aspect. Currently, the only available
3463 report type is a simple text report.
3464
3465\end{description}
3466
3467
3468\subsubsection{no.uio.ifi.refaktor.textselection}
3469This package contains the two custom text selections that are used extensively
3470throughout the project. One of them is just a subclass of the other, to support
3471the use of the memento pattern to optimize the memory usage during benchmarking.
3472
3473\subsubsection{no.uio.ifi.refaktor.debugging}
3474The package contains a debug utility class. I addition to this, the package
3475\code{no.uio.ifi.refaktor.utils.aspects} contains a couple of aspects used for
3476debugging purposes.
3477
3478\subsubsection{no.uio.ifi.refaktor.utils}
3479Utility package that contains all the functionality that has to do with parsing
3480of source code. It also has utility classes for looking up handles to methods
3481and types et cetera.
3482
3483\begin{description}
3484 \item[no.uio.ifi.refaktor.utils.caching] \hfill \\
3485 This package contains the caching manager for compilation units, along with
3486 classes for different caching strategies.
3487
3488 \item[no.uio.ifi.refaktor.utils.nullobjects] \hfill \\
3489 Contains classes for creating different null objects. Most of the classes is
3490 used to represent null objects of different handle types. These null objects
3491 are returned from various utility classes instead of returning a \var{null}
3492 value when other values are not available.
3493
3494\end{description}
3495
3496\section{Continuous integration}
3497The continuous integration server
3498\name{Jenkins}\footnote{\url{http://jenkins-ci.org/}} has been set up for the
3499project\footnote{A work mostly done by the supervisor.}. It is used as a way to
3500run tests and perform code coverage analysis.
3501
3502To be able to build the \name{Eclipse} plugins and run tests for them with Jenkins, the
3503component assembly project
3504\name{Buckminster}\footnote{\url{http://www.eclipse.org/buckminster/}} is used,
3505through its plugin for Jenkins. Buckminster provides for a way to specify the
3506resources needed for building a project and where and how to find them.
3507Buckminster also handles the setup of a target environment to run the tests in.
3508All this is needed because the code to build depends on an \name{Eclipse}
3509installation with various plugins.
3510
3511\subsection{Problems with AspectJ}
3512The Buckminster build worked fine until introducing AspectJ into the project.
3513When building projects using AspectJ, there are some additional steps that needs
3514to be performed. First of all, the aspects themselves must be compiled. Then the
3515aspects needs to be woven with the classes they affect. This demands a process
3516that does multiple passes over the source code.
3517
3518When using AspectJ with \name{Eclipse}, the specialized compilation and the
3519weaving can be handled by the \name{AspectJ Development
3520Tools}\footnote{\url{https://www.eclipse.org/ajdt/}}. This works all fine, but
3521it complicates things when trying to build a project depending on \name{Eclipse}
3522plugins outside of \name{Eclipse}. There is supposed to be a way to specify a
3523compiler adapter for javac, together with the file extensions for the file types
3524it shall operate. The AspectJ compiler adapter is called
3525\typewithref{org.aspectj.tools.ant.taskdefs}{Ajc11CompilerAdapter}, and it works
3526with files that has the extensions \code{*.java} and \code{*.aj}. I tried to
3527setup this in the build properties file for the project containing the aspects,
3528but to no avail. The project containing the aspects does not seem to be built at
3529all, and the projects that depends on it complains that they cannot find certain
3530classes.
3531
3532I then managed to write an \name{Ant}\footnote{\url{https://ant.apache.org/}}
3533build file that utilizes the AspectJ compiler adapter, for the
3534\code{no.uio.ifi.refaktor} plugin. The problem was then that it could no longer
3535take advantage of the environment set up by Buckminster. The solution to this
3536particular problem was of a ``hacky'' nature. It involves exporting the plugin
3537dependencies for the project to an Ant build file, and copy the exported path
3538into the existing build script. But then the Ant script needs to know where the
3539local \name{Eclipse} installation is located. This is no problem when building
3540on a local machine, but to utilize the setup done by Buckminster is a problem
3541still unsolved. To get the classpath for the build setup correctly, and here
3542comes the most ``hacky'' part of the solution, the Ant script has a target for
3543copying the classpath elements into a directory relative to the project
3544directory and checking it into Git. When no \code{ECLIPSE\_HOME} property is set
3545while running Ant, the script uses the copied plugins instead of the ones
3546provided by the \name{Eclipse} installation when building the project. This
3547obviously creates some problems with maintaining the list of dependencies in the
3548Ant file, as well as remembering to copy the plugins every time the list of
3549dependencies change.
3550
3551The Ant script described above is run by Jenkins before the Buckminster setup
3552and build. When setup like this, the Buckminster build succeeds for the projects
3553not using AspectJ, and the tests are run as normal. This is all good, but it
3554feels a little scary, since the reason for Buckminster not working with AspectJ
3555is still unknown.
3556
3557The problems with building with AspectJ on the Jenkins server lasted for a
3558while, before they were solved. This is reflected in the ``Test Result Trend''
3559and ``Code Coverage Trend'' reported by Jenkins.
3560
3561
e1d6ae87
EK
3562\chapter{Methodology}
3563
3564\section{Evolutionary design}
3565In the programming work for this project, it have tried to use a design strategy
4928aa0b
EK
3566called evolutionary design, also known as continuous or incremental
3567design\citing{wiki_continuous_2014}. It is a software design strategy
3568advocated by the Extreme Programming community. The essence of the strategy is
3569that you should let the design of your program evolve naturally as your
3570requirements change. This is seen in contrast with up-front design, where
3571design decisions are made early in the process.
e1d6ae87
EK
3572
3573The motivation behind evolutionary design is to keep the design of software as
3574simple as possible. This means not introducing unneeded functionality into a
3575program. You should defer introducing flexibility into your software, until it
3576is needed to be able to add functionality in a clean way.
3577
3578Holding up design decisions, implies that the time will eventually come when
3579decisions have to be made. The flexibility of the design then relies on the
3580programmer's abilities to perform the necessary refactoring, and \his confidence
3581in those abilities. From my experience working on this project, I can say that
3582this confidence is greatly enhanced by having automated tests to rely on
3583\see{tdd}.
3584
3585The choice of going for evolutionary design developed naturally. As Fowler
3586points out in his article \tit{Is Design Dead?}, evolutionary design much
3587resembles the ``code and fix'' development strategy\citing{fowler_design_2004}.
3588A strategy that most of us have practiced in school. This was also the case when
3589I first started this work. I had to learn the inner workings of Eclipse and its
3590refactoring-related plugins. That meant a lot of fumbling around with code I did
3591not know, in a trial and error fashion. Eventually I started writing tests for
3592my code, and my design began to evolve.
3593
4928aa0b
EK
3594\section{Test-driven development}\label{tdd}
3595As mentioned before, the project started out as a classic code and fix
3596developmen process. My focus was aimed at getting something to work, rather than
3597doing so according to best practice. This resulted in a project that got out of
3598its starting blocks, but it was not accompanied by any tests. Hence it was soon
3599difficult to make any code changes with the confidence that the program was
3600still correct afterwards (assuming it was so before changing it). I always knew
3601that I had to introduce some tests at one point, but this experience accelerated
3602the process of leading me onto the path of testing.
3603
3604I then wrote tests for the core functionality of the plugin, and thus gained
3605more confidence in the correctness of my code. I could now perform quite drastic
3606changes without ``wetting my pants``. After this, nearly all of the semantic
3607changes done to the business logic of the project, or the addition of new
3608functionality, was made in a test-driven manner. This means that before
3609performing any changes, I would define the desired functionality through a set
3610of tests. I would then run the tests to check that they were run and that they
3611did not pass. Then I would do any code changes necessary to make the tests
3612pass. The definition of how the program is supposed to operate is then captured
3613by the tests. However, this does not prove the correctness of the analysis
3614leading to the test definitions.
3615
3616\section{Continuous integration}
3617\todoin{???}
3618
3619
3727b75b 3620\chapter{Eclipse Bugs Found}
540ca7e4
EK
3621\newcommand{\submittedBugReport}[1]{The submitted bug report can be found on
3622 \url{#1}.}
94bb49f0
EK
3623
3624\section{Eclipse bug 420726: Code is broken when moving a method that is
0f6e45f8
EK
3625assigning to the parameter that is also the move
3626destination}\label{eclipse_bug_420726}
540ca7e4 3627This bug
94bb49f0 3628was found when analyzing what kinds of names that was to be considered as
3727b75b 3629\emph{unfixes} \see{unfixes}.
94bb49f0
EK
3630
3631\subsection{The bug}
3632The bug emerges when trying to move a method from one class to another, and when
3633the target for the move (must be a variable, local or field) is both a parameter
fe0a4c48 3634variable and also is assigned to within the method body. \name{Eclipse} allows this to
94bb49f0
EK
3635happen, although it is the sure path to a compilation error. This is because we
3636would then have an assignment to a \var{this} expression, which is not allowed
540ca7e4
EK
3637in Java.
3638\submittedBugReport{https://bugs.eclipse.org/bugs/show\_bug.cgi?id=420726}
94bb49f0
EK
3639
3640\subsection{The solution}
3641The solution to this problem is to add all simple names that are assigned to in
3642a method body to the set of unfixes.
128adb4f 3643
f1b6174d
EK
3644\section{Eclipse bug 429416: IAE when moving method from anonymous
3645class}\label{eclipse_bug_429416}
540ca7e4 3646I discovered
128adb4f
EK
3647this bug during a batch change on the \type{org.eclipse.jdt.ui} project.
3648
3649\subsection{The bug}
fe0a4c48 3650This bug surfaces when trying to use the \refa{Move Method} refactoring to move a
94bb49f0 3651method from an anonymous class to another class. This happens both for my
fe0a4c48
EK
3652simulation as well as in \name{Eclipse}, through the user interface. It only occurs
3653when \name{Eclipse} analyzes the program and finds it necessary to pass an instance of
94bb49f0 3654the originating class as a parameter to the moved method. I.e. it want to pass a
128adb4f
EK
3655\var{this} expression. The execution ends in an
3656\typewithref{java.lang}{IllegalArgumentException} in
3657\typewithref{org.eclipse.jdt.core.dom}{SimpleName} and its
3658\method{setIdentifier(String)} method. The simple name is attempted created in
3659the method
3660\methodwithref{org.eclipse.jdt.internal.corext.refactoring.structure.\\MoveInstanceMethodProcessor}{createInlinedMethodInvocation}
3661so the \type{MoveInstanceMethodProcessor} was early a clear suspect.
3662
3663The \method{createInlinedMethodInvocation} is the method that creates a method
3664invocation where the previous invocation to the method that was moved was. From
3665its code it can be read that when a \var{this} expression is going to be passed
3666in to the invocation, it shall be qualified with the name of the original
3ab3e132 3667method's declaring class, if the declaring class is either an anonymous class or
128adb4f
EK
3668a member class. The problem with this, is that an anonymous class does not have
3669a name, hence the term \emph{anonymous} class! Therefore, when its name, an
3670empty string, is passed into
3671\methodwithref{org.eclipse.jdt.core.dom.AST}{newSimpleName} it all ends in an
540ca7e4
EK
3672\type{IllegalArgumentException}.
3673\submittedBugReport{https://bugs.eclipse.org/bugs/show\_bug.cgi?id=429416}
128adb4f
EK
3674
3675\subsection{How I solved the problem}
3676Since the \type{MoveInstanceMethodProcessor} is instantiated in the
3677\typewithref{no.uio.ifi.refaktor.change.executors}{MoveMethod\-RefactoringExecutor},
3678and only need to be a
3679\typewithref{org.eclipse.ltk.core.refactoring.participants}{MoveProcessor}, I
3680was able to copy the code for the original move processor and modify it so that
3681it works better for me. It is now called
f1b6174d 3682\typewithref{no.uio.ifi.refaktor.change.processors}{ModifiedMoveInstanceMethodProcessor}.
128adb4f
EK
3683The only modification done (in addition to some imports and suppression of
3684warnings), is in the \method{createInlinedMethodInvocation}. When the declaring
3685class of the method to move is anonymous, the \var{this} expression in the
3686parameter list is not qualified with the declaring class' (empty) name.
3687
3727b75b
EK
3688\section{Eclipse bug 429954: Extracting statement with reference to local type
3689breaks code}\label{eclipse_bug_429954}
540ca7e4 3690The bug
a6415293 3691was discovered when doing some changes to the way unfixes is computed.
3727b75b
EK
3692
3693\subsection{The bug}
fe0a4c48 3694The problem is that \name{Eclipse} is allowing selections that references variables of
3727b75b
EK
3695local types to be extracted. When this happens the code is broken, since the
3696extracted method must take a parameter of a local type that is not in the
3697methods scope. The problem is illustrated in
540ca7e4
EK
3698\myref{lst:extractMethod_LocalClass}, but there in another setting.
3699\submittedBugReport{https://bugs.eclipse.org/bugs/show\_bug.cgi?id=429954}
3727b75b
EK
3700
3701\subsection{Actions taken}
3702There are no actions directly springing out of this bug, since the Extract
a6415293 3703Method refactoring cannot be meant to be this way. This is handled on the
fe0a4c48 3704analysis stage of our \refa{Extract and Move Method} refactoring. So names representing
3727b75b
EK
3705variables of local types is considered unfixes \see{unfixes}.
3706\todoin{write more when fixing this in legal statements checker}
3707
d516ac0b
EK
3708\chapter{Conclusions and Future Work}
3709\todoin{Write}
0e6e57d3 3710
d516ac0b 3711\section{Future work}
0e6e57d3
EK
3712\todoin{Copied from introduction:}
3713For the metrics, I will at least measure the \metr{Coupling between object
3714classes} (CBO) metric that is described by Chidamber and Kemerer in their
3715article \tit{A Metrics Suite for Object Oriented
3716Design}\citing{metricsSuite1994}.
3717
3718\ldots
3719
3720Then the effect of the change must be measured by calculating the chosen
3721software metrics both before and after the execution.
d516ac0b
EK
3722\todoin{Metrics, \ldots}
3723
0d7fbd88
EK
3724\chapter{Related Work}
3725
3726\section{The compositional paradigm of refactoring}
3727This paradigm builds upon the observation of Vakilian et
3728al.\citing{vakilian2012}, that of the many automated refactorings existing in
3729modern IDEs, the simplest ones are dominating the usage statistics. The report
fe0a4c48 3730mainly focuses on \name{Eclipse} as the tool under investigation.
0d7fbd88
EK
3731
3732The paradigm is described almost as the opposite of automated composition of
3733refactorings \see{compositeRefactorings}. It works by providing the programmer
3734with easily accessible primitive refactorings. These refactorings shall be
3735accessed via keyboard shortcuts or quick-assist menus\footnote{Think
fe0a4c48 3736quick-assist with Ctrl+1 in \name{Eclipse}} and be promptly executed, opposed to in the
3ab3e132 3737currently dominating wizard-based refactoring paradigm. They are meant to
0d7fbd88
EK
3738stimulate composing smaller refactorings into more complex changes, rather than
3739doing a large upfront configuration of a wizard-based refactoring, before
3740previewing and executing it. The compositional paradigm of refactoring is
3741supposed to give control back to the programmer, by supporting \himher with an
3742option of performing small rapid changes instead of large changes with a lesser
3743degree of control. The report authors hope this will lead to fewer unsuccessful
3744refactorings. It also could lower the bar for understanding the steps of a
3745larger composite refactoring and thus also help in figuring out what goes wrong
3746if one should choose to op in on a wizard-based refactoring.
3747
3748Vakilian and his associates have performed a survey of the effectiveness of the
3749compositional paradigm versus the wizard-based one. They claim to have found
3750evidence of that the \emph{compositional paradigm} outperforms the
3751\emph{wizard-based}. It does so by reducing automation, which seem
3752counterintuitive. Therefore they ask the question ``What is an appropriate level
3753of automation?'', and thus questions what they feel is a rush toward more
3754automation in the software engineering community.
3755
3756
9ff90080 3757\backmatter{}
fe0a4c48 3758\printglossaries
9ff90080 3759\printbibliography
055dca93 3760\listoftodos
9ff90080 3761\end{document}