]> git.uio.no Git - ifi-stolz-refaktor.git/blame - thesis/master-thesis-erlenkr.tex
Thesis: fixing page numbering
[ifi-stolz-refaktor.git] / thesis / master-thesis-erlenkr.tex
CommitLineData
c8088eec 1\documentclass[USenglish,11pt]{ifimaster}
571ef294 2\usepackage{import}
7c28933b 3\usepackage[utf8]{inputenc}
9ff90080 4\usepackage[T1]{fontenc,url}
3510e539 5\usepackage{lmodern} % using Latin Modern to be able to use bold typewriter font
b4e539f7 6%\usepackage{mathpazo}
9ff90080 7\urlstyle{sf}
128adb4f 8\usepackage{listings}
58467004 9\usepackage{booktabs}
4e468834 10\usepackage{tabularx}
d11bcf4d
EK
11\usepackage{tikz}
12\usepackage{tikz-qtree}
5e5908eb 13\usetikzlibrary{shapes,snakes,trees,arrows,shadows,positioning,calc}
ae138b9d
EK
14\usepackage{babel,textcomp,csquotes,ifimasterforside}
15
16\usepackage{varioref}
79d5c2a9 17\usepackage[hidelinks]{hyperref}
8b6b22c8 18\usepackage{cleveref}
b4a197fd 19\usepackage[xindy]{glossaries}
ae138b9d 20
4ea21603 21\usepackage[style=alphabetic,backend=biber,doi=false,isbn=false]{biblatex}
12c254af 22\usepackage{amsthm}
3471cd15 23\usepackage{mathtools}
0ab2e134
EK
24\usepackage{graphicx}
25% use 'disable' before printing:
26\usepackage[]{todonotes}
0d7fbd88
EK
27\usepackage{xspace}
28\usepackage{he-she}
b289552b 29\usepackage{verbatim}
ddcea0b5 30\usepackage{minted}
347ed677 31\usepackage{multicol}
ddcea0b5 32\usemintedstyle{bw}
53be7239
EK
33
34\def\mintedframesep{11pt}
35
8fae7b44
EK
36\usepackage{perpage} %the perpage package
37\MakePerPage{footnote} %the perpage package command
9ff90080 38
6c51af15 39\theoremstyle{definition}
12c254af 40\newtheorem*{wordDef}{Definition}
3471cd15 41\newtheorem*{theorem}{Theorem}
12c254af 42
ddcea0b5
EK
43\graphicspath{ {./figures/} }
44
8b6b22c8 45\newcommand{\citing}[1]{~\cite{#1}}
e123ab03
EK
46%\newcommand{\myref}[1]{\cref{#1} on \cpageref{#1}}
47\newcommand{\myref}[1]{\vref{#1}}
4306ef44 48\newcommand{\Myref}[1]{\Vref{#1}}
8b6b22c8 49
b4a197fd 50%\newcommand{\glossref}[1]{\textsuperscript{(\glsrefentry{#1})}}
20bcc7bf
EK
51%\newcommand{\gloss}[1]{\gls{#1}\glossref{#1}}
52%\newcommand{\glosspl}[1]{\glspl{#1}\glossref{#1}}
53\newcommand{\gloss}[1]{\gls{#1}}
54\newcommand{\glosspl}[1]{\glspl{#1}}
fe0a4c48 55
12c254af 56\newcommand{\definition}[1]{\begin{wordDef}#1\end{wordDef}}
8b6b22c8 57\newcommand{\see}[1]{(see \myref{#1})}
137e0e7b
EK
58\newcommand{\explanation}[3]{\noindent\textbf{\textit{#1}}\\*\emph{When:}
59#2\\*\emph{How:} #3\\*[-7px]}
4e135659 60
128adb4f 61%\newcommand{\type}[1]{\lstinline{#1}}
03674629
EK
62\newcommand{\code}[1]{\texttt{\textbf{#1}}}
63\newcommand{\type}[1]{\code{#1}}
f041551b
EK
64\newcommand{\typeref}[1]{\footnote{\type{#1}}}
65\newcommand{\typewithref}[2]{\type{#2}\typeref{#1.#2}}
66\newcommand{\method}[1]{\type{#1}}
67\newcommand{\methodref}[2]{\footnote{\type{#1}\method{\##2()}}}
68\newcommand{\methodwithref}[2]{\method{#2}\footnote{\type{#1}\method{\##2()}}}
3510e539 69\newcommand{\var}[1]{\type{#1}}
9ff90080 70
fe0a4c48
EK
71\newcommand{\name}[1]{#1}
72\newcommand{\tit}[1]{\emph{#1}}
73\newcommand{\refa}[1]{\emph{#1}}
74\newcommand{\pattern}[1]{\emph{#1}}
75\newcommand{\metr}[1]{\emph{#1}}
76\newcommand{\ExtractMethod}{\refa{Extract Method}\xspace}
77\newcommand{\MoveMethod}{\refa{Move Method}\xspace}
78\newcommand{\ExtractAndMoveMethod}{\refa{Extract and Move Method}\xspace}
b5c7bb1b 79
1c521a77 80\newcommand{\m}[1]{$#1$}
46f52f52 81
7125ceac 82\newcommand\todoin[2][]{\todo[inline, caption={#2}, #1]{
4e135659
EK
83\begin{minipage}{\textwidth-4pt}#2\end{minipage}}}
84
cbc3b044 85\title{Automated Composition of Refactorings}
b4d90424 86\subtitle{Implementing and evaluating a search-based Extract and Move Method
01d46361 87refactoring}
7c28933b
EK
88\author{Erlend Kristiansen}
89
fe0a4c48
EK
90\makeglossaries
91\newglossaryentry{profiling}
92{
93 name=profiling,
94 description={is to run a computer program through a profiler/with a profiler
95 attached}
96}
20bcc7bf
EK
97\newglossaryentry{profiler}
98{
99 name=profiler,
100 description={A profiler is a program for analyzing performance within an
101 application. It is used to analyze memory consumption, processing time and
60065669 102frequency of procedure calls and such}
20bcc7bf
EK
103}
104\newglossaryentry{xUnit}
105{
106 name={xUnit framework},
107 description={An xUnit framework is a framework for writing unit tests for a
108 computer program. It follows the patterns known from the JUnit framework for
109 Java\citing{fowlerXunit}
110 },
111 plural={xUnit frameworks}
112}
fe0a4c48
EK
113\newglossaryentry{softwareObfuscation}
114{
115 name={software obfuscation},
116 description={makes source code harder to read and analyze, while preserving
117 its semantics}
118}
119\newglossaryentry{extractClass}
120{
121 name=\refa{Extract Class},
122 description={The \refa{Extract Class} refactoring works by creating a class,
123for then to move members from another class to that class and access them from
124the old class via a reference to the new class}
125}
126\newglossaryentry{designPattern}
127{
128 name={design pattern},
129 description={A design pattern is a named abstraction, that is meant to solve a
130 general design problem. It describes the key aspects of a common problem and
131identifies its participators and how they collaborate},
132 plural={design patterns}
133}
1c521a77
EK
134\newglossaryentry{enclosingClass}
135{
136 name={enclosing class},
137 description={An enclosing class is the class that surrounds any specific piece
138 of code that is written in the inner scope of this class},
139}
81cf9554
EK
140\newglossaryentry{mementoPattern}
141{
142 name={memento pattern},
143 description={The memento pattern is a software design pattern that is used to
144 capture an object's internal state so that it can be restored to this state
145 later\citing{designPatterns}},
146}
b4a197fd
EK
147%\newglossaryentry{extractMethod}
148%{
149% name=\refa{Extract Method},
150% description={The \refa{Extract Method} refactoring is used to extract a
151%fragment of code from its context and into a new method. A call to the new
152%method is inlined where the fragment was before. It is used to break code into
153%logical units, with names that explain their purpose}
154%}
155%\newglossaryentry{moveMethod}
156%{
157% name=\refa{Move Method},
158% description={The \refa{Move Method} refactoring is used to move a method from
159% one class to another. This is useful if the method is using more features of
160% another class than of the class which it is currently defined. Then all calls
161% to this method must be updated, or the method must be copied, with the old
162%method delegating to the new method}
163%}
f5fb40e4 164
7c28933b 165\bibliography{bibliography/master-thesis-erlenkr-bibliography}
6fabec9b
EK
166\DefineBibliographyStrings{english}{%
167 bibliography = {References},
168}
9ff90080 169
c8088eec
EK
170% UML comment in TikZ:
171% ref: https://tex.stackexchange.com/questions/103688/folded-paper-shape-tikz
43ca7be4
EK
172\makeatletter
173\pgfdeclareshape{umlcomment}{
174 \inheritsavedanchors[from=rectangle] % this is nearly a rectangle
175 \inheritanchorborder[from=rectangle]
176 \inheritanchor[from=rectangle]{center}
177 \inheritanchor[from=rectangle]{north}
178 \inheritanchor[from=rectangle]{south}
179 \inheritanchor[from=rectangle]{west}
180 \inheritanchor[from=rectangle]{east}
181 % ... and possibly more
182 \backgroundpath{% this is new
183 % store lower right in xa/ya and upper right in xb/yb
184 \southwest \pgf@xa=\pgf@x \pgf@ya=\pgf@y
185 \northeast \pgf@xb=\pgf@x \pgf@yb=\pgf@y
186 % compute corner of ‘‘flipped page’’
187 \pgf@xc=\pgf@xb \advance\pgf@xc by-10pt % this should be a parameter
188 \pgf@yc=\pgf@yb \advance\pgf@yc by-10pt
189 % construct main path
190 \pgfpathmoveto{\pgfpoint{\pgf@xa}{\pgf@ya}}
191 \pgfpathlineto{\pgfpoint{\pgf@xa}{\pgf@yb}}
192 \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yb}}
193 \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@yc}}
194 \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@ya}}
195 \pgfpathclose
196 % add little corner
197 \pgfpathmoveto{\pgfpoint{\pgf@xc}{\pgf@yb}}
198 \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yc}}
199 \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@yc}}
200 \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yc}}
201 }
202}
203\makeatother
204
205\tikzstyle{comment}=[%
206 draw,
207 drop shadow,
208 fill=white,
209 align=center,
210 shape=document,
211 minimum width=20mm,
212 minimum height=10mm,
213 shape=umlcomment,
214 inner sep=2ex,
215 font=\ttfamily,
216]
217
f5fb40e4
EK
218%\interfootnotelinepenalty=10000
219
c9488804
EK
220% Space between table rows
221\renewcommand{\arraystretch}{1.3}
6fabec9b 222% Multicolumns
bc7b5d67 223\newcommand{\spancols}[2]{\multicolumn{#1}{@{}l@{}}{#2}}
6fabec9b
EK
224% Column types
225\newcolumntype{L}[1]{>{\hsize=#1\hsize\raggedright\arraybackslash}X}%
226\newcolumntype{R}[1]{>{\hsize=#1\hsize\raggedleft\arraybackslash}X}%
c9488804 227
9ff90080 228\begin{document}
0cbeeadb
EK
229%\pagenumbering{arabic}
230\mainmatter
531c4132 231\ififorside
0cbeeadb 232%\frontmatter{}
9ff90080 233
0cbeeadb 234%\setcounter{page}{3}
9ff90080
EK
235
236\chapter*{Abstract}
064227e3
EK
237\todoin{\textbf{Remove all todos (including list) before delivery/printing!!!
238Can be done by removing ``draft'' from documentclass.}}
889ba93e 239\todoin{Write abstract}
9ff90080
EK
240
241\tableofcontents{}
242\listoffigures{}
243\listoftables{}
244
0cbeeadb
EK
245%\mainmatter
246%\setcounter{page}{13}
247
af21f437
EK
248\chapter{Introduction}
249\section{Motivation and structure}
00aa0588 250
b4d90424 251\section{What is refactoring?}
7c28933b 252
f3a108c3
EK
253This question is best answered by first defining the concept of a
254\emph{refactoring}, what it is to \emph{refactor}, and then discuss what aspects
a1bafe90 255of programming make people want to refactor their code.
00aa0588 256
b4d90424 257\subsection{Defining refactoring}
a1bafe90 258Martin Fowler, in his classic book on refactoring\citing{refactoring}, defines a
00aa0588 259refactoring like this:
ee45c41f 260
00aa0588 261\begin{quote}
aa1e3779
EK
262 \emph{Refactoring} (noun): a change made to the internal
263 structure\footnote{The structure observable by the programmer.} of software to
264 make it easier to understand and cheaper to modify without changing its
265 observable behavior.~\cite[p.~53]{refactoring}
00aa0588 266\end{quote}
ee45c41f 267
a1bafe90 268\noindent This definition assigns additional meaning to the word
ee45c41f
EK
269\emph{refactoring}, beyond the composition of the prefix \emph{re-}, usually
270meaning something like ``again'' or ``anew'', and the word \emph{factoring},
6c51af15
EK
271that can mean to isolate the \emph{factors} of something. Here a \emph{factor}
272would be close to the mathematical definition of something that divides a
273quantity, without leaving a remainder. Fowler is mixing the \emph{motivation}
a1bafe90
EK
274behind refactoring into his definition. Instead it could be more refined, formed
275to only consider the \emph{mechanical} and \emph{behavioral} aspects of
276refactoring. That is to factor the program again, putting it together in a
277different way than before, while preserving the behavior of the program. An
278alternative definition could then be:
6c51af15
EK
279
280\definition{A \emph{refactoring} is a transformation
8fae7b44 281done to a program without altering its external behavior.}
00aa0588 282
ee1d883a
EK
283From this we can conclude that a refactoring primarily changes how the
284\emph{code} of a program is perceived by the \emph{programmer}, and not the
740e1b6c
EK
285\emph{behavior} experienced by any user of the program. Although the logical
286meaning is preserved, such changes could potentially alter the program's
287behavior when it comes to performance gain or -penalties. So any logic depending
288on the performance of a program could make the program behave differently after
289a refactoring.
00aa0588 290
fe0a4c48
EK
291In the extreme case one could argue that \gloss{softwareObfuscation} is
292refactoring. It is often used to protect proprietary software. It restrains
293uninvited viewers, so they have a hard time analyzing code that they are not
294supposed to know how works. This could be a problem when using a language that
295is possible to decompile, such as Java.
b4e539f7
EK
296
297Obfuscation could be done composing many, more or less randomly chosen,
298refactorings. Then the question arises whether it can be called a
299\emph{composite refactoring} or not \see{compositeRefactorings}? The answer is
300not obvious. First, there is no way to describe the mechanics of software
301obfuscation, because there are infinitely many ways to do that. Second,
302obfuscation can be thought of as \emph{one operation}: Either the code is
303obfuscated, or it is not. Third, it makes no sense to call software obfuscation
304\emph{a refactoring}, since it holds different meaning to different people.
305
306This last point is important, since one of the motivations behind defining
307different refactorings, is to establish a \emph{vocabulary} for software
308professionals to use when reasoning about and discussing programs, similar to
fe0a4c48 309the motivation behind \glosspl{designPattern}\citing{designPatterns}.
b4e539f7
EK
310\begin{comment}
311So for describing \emph{software obfuscation}, it might be more appropriate to
312define what you do when performing it rather than precisely defining its
313mechanics in terms of other refactorings.
314\end{comment}
00aa0588 315
b4d90424 316\subsection{The etymology of 'refactoring'}
f3a108c3
EK
317It is a little difficult to pinpoint the exact origin of the word
318``refactoring'', as it seems to have evolved as part of a colloquial
319terminology, more than a scientific term. There is no authoritative source for a
320formal definition of it.
321
b5c7bb1b 322According to Martin Fowler\citing{etymology-refactoring}, there may also be more
f3a108c3 323than one origin of the word. The most well-known source, when it comes to the
b4e539f7
EK
324origin of \emph{refactoring}, is the
325Smalltalk\footnote{\label{footNote}Programming language} community and their
fe0a4c48 326infamous \name{Refactoring
f3a108c3 327Browser}\footnote{\url{http://st-www.cs.illinois.edu/users/brant/Refactory/RefactoringBrowser.html}}
fe0a4c48 328described in the article \tit{A Refactoring Tool for
b5c7bb1b
EK
329Smalltalk}\citing{refactoringBrowser1997}, published in 1997.
330Allegedly\citing{etymology-refactoring}, the metaphor of factoring programs was
b4e539f7 331also present in the Forth\textsuperscript{\ref{footNote}} community, and the
fe0a4c48
EK
332word ``refactoring'' is mentioned in a book by Leo Brodie, called \tit{Thinking
333Forth}\citing{brodie2004}, first published in 1984\footnote{\tit{Thinking Forth}
334was first published in 1984 by the \name{Forth Interest Group}. Then it was
335reprinted in 1994 with minor typographical corrections, before it was
b4e539f7
EK
336transcribed into an electronic edition typeset in \LaTeX\ and published under a
337Creative Commons licence in
3382004. The edition cited here is the 2004 edition, but the content should
339essentially be as in 1984.}. The exact word is only printed one
340place~\cite[p.~232]{brodie2004}, but the term \emph{factoring} is prominent in
341the book, that also contains a whole chapter dedicated to (re)factoring, and how
342to keep the (Forth) code clean and maintainable.
ee45c41f 343
f3a108c3
EK
344\begin{quote}
345 \ldots good factoring technique is perhaps the most important skill for a
3a154bb7 346 Forth programmer.~\cite[p.~172]{brodie2004}
f3a108c3 347\end{quote}
ee45c41f
EK
348
349\noindent Brodie also express what \emph{factoring} means to him:
350
f3a108c3
EK
351\begin{quote}
352 Factoring means organizing code into useful fragments. To make a fragment
353 useful, you often must separate reusable parts from non-reusable parts. The
354 reusable parts become new definitions. The non-reusable parts become arguments
3a154bb7 355 or parameters to the definitions.~\cite[p.~172]{brodie2004}
f3a108c3
EK
356\end{quote}
357
358Fowler claims that the usage of the word \emph{refactoring} did not pass between
fe0a4c48 359the \name{Forth} and \name{Smalltalk} communities, but that it emerged
f3a108c3
EK
360independently in each of the communities.
361
b4d90424 362\subsection{Reasons for refactoring}
2f6e6dec
EK
363There are many reasons why people want to refactor their programs. They can for
364instance do it to remove duplication, break up long methods or to introduce
b4e539f7
EK
365design patterns into their software systems. The shared trait for all these are
366that peoples' intentions are to make their programs \emph{better}, in some
367sense. But what aspects of their programs are becoming improved?
368
369As just mentioned, people often refactor to get rid of duplication. They are
370moving identical or similar code into methods, and are pushing methods up or
371down in their class hierarchies. They are making template methods for
372overlapping algorithms/functionality, and so on. It is all about gathering what
373belongs together and putting it all in one place. The resulting code is then
374easier to maintain. When removing the implicit coupling\footnote{When
375 duplicating code, the duplicate pieces of code might not be coupled, apart
376from representing the same functionality. So if this functionality is going to
377change, it might need to change in more than one place, thus creating an
378implicit coupling between multiple pieces of code.} between code snippets, the
137e0e7b 379location of a bug is limited to only one place, and new functionality need only
a1bafe90
EK
380to be added to this one place, instead of a number of places people might not
381even remember.
382
383A problem you often encounter when programming, is that a program contains a lot
384of long and hard-to-grasp methods. It can then help to break the methods into
b4a197fd
EK
385smaller ones, using the \ExtractMethod refactoring\citing{refactoring}. Then
386you may discover something about a program that you were not aware of before;
387revealing bugs you did not know about or could not find due to the complex
078b1e4a
EK
388structure of your program. Making the methods smaller and giving good names to
389the new ones clarifies the algorithms and enhances the \emph{understandability}
390of the program \see{magic_number_seven}. This makes refactoring an excellent
391method for exploring unknown program code, or code that you had forgotten that
392you wrote.
a1bafe90 393
b4e539f7
EK
394Most primitive refactorings are simple, and usually involves moving code
395around\citing{kerievsky2005}. The motivation behind them may first be revealed
396when they are combined into larger --- higher level --- refactorings, called
a1bafe90 397\emph{composite refactorings} \see{compositeRefactorings}. Often the goal of
b4e539f7
EK
398such a series of refactorings is a design pattern. Thus the design can
399\emph{evolve} throughout the lifetime of a program, as opposed to designing
400up-front. It is all about being structured and taking small steps to improve a
401program's design.
a1bafe90
EK
402
403Many software design pattern are aimed at lowering the coupling between
404different classes and different layers of logic. One of the most famous is
fe0a4c48
EK
405perhaps the \pattern{Model-View-Controller}\citing{designPatterns} pattern. It
406is aimed at lowering the coupling between the user interface, the business logic
b4e539f7
EK
407and the data representation of a program. This also has the added benefit that
408the business logic could much easier be the target of automated tests, thus
409increasing the productivity in the software development process.
0b0567f2
EK
410
411Another effect of refactoring is that with the increased separation of concerns
a1bafe90
EK
412coming out of many refactorings, the \emph{performance} can be improved. When
413profiling programs, the problematic parts are narrowed down to smaller parts of
414the code, which are easier to tune, and optimization can be performed only where
b4e539f7 415needed and in a more effective way\citing{refactoring}.
137e0e7b 416
b01d328a
EK
417Last, but not least, and this should probably be the best reason to refactor, is
418to refactor to \emph{facilitate a program change}. If one has managed to keep
419one's code clean and tidy, and the code is not bloated with design patterns that
a1bafe90 420are not ever going to be needed, then some refactoring might be needed to
b01d328a
EK
421introduce a design pattern that is appropriate for the change that is going to
422happen.
423
137e0e7b
EK
424Refactoring program code --- with a goal in mind --- can give the code itself
425more value. That is in the form of robustness to bugs, understandability and
a1bafe90
EK
426maintainability. Having robust code is an obvious advantage, but
427understandability and maintainability are both very important aspects of
428software development. By incorporating refactoring in the development process,
429bugs are found faster, new functionality is added more easily and code is easier
430to understand by the next person exposed to it, which might as well be the
431person who wrote it. The consequence of this, is that refactoring can increase
432the average productivity of the development process, and thus also add to the
433monetary value of a business in the long run. The perspective on productivity
434and money should also be able to open the eyes of the many nearsighted managers
435that seldom see beyond the next milestone.
137e0e7b 436
b4d90424 437\subsection{The magical number seven}\label{magic_number_seven}
fe0a4c48
EK
438The article \tit{The magical number seven, plus or minus two: some limits on our
439capacity for processing information}\citing{miller1956} by George A. Miller,
440was published in the journal \name{Psychological Review} in 1956. It presents
441evidence that support that the capacity of the number of objects a human being
442can hold in its working memory is roughly seven, plus or minus two objects. This
443number varies a bit depending on the nature and complexity of the objects, but
444is according to Miller ``\ldots never changing so much as to be
f4cea2d6
EK
445unrecognizable.''
446
447Miller's article culminates in the section called \emph{Recoding}, a term he
448borrows from communication theory. The central result in this section is that by
449recoding information, the capacity of the amount of information that a human can
450process at a time is increased. By \emph{recoding}, Miller means to group
b4e539f7
EK
451objects together in chunks, and give each chunk a new name that it can be
452remembered by.
f4cea2d6
EK
453
454\begin{quote}
455 \ldots recoding is an extremely powerful weapon for increasing the amount of
4cb06723 456 information that we can deal with.~\cite[p.~95]{miller1956}
f4cea2d6 457\end{quote}
ee45c41f 458
b4e539f7
EK
459By organizing objects into patterns of ever growing depth, one can memorize and
460process a much larger amount of data than if it were to be represented as its
461basic pieces. This grouping and renaming is analogous to how many refactorings
462work, by grouping pieces of code and give them a new name. Examples are the
fe0a4c48 463fundamental \ExtractMethod and \refa{Extract Class}
b4e539f7
EK
464refactorings\citing{refactoring}.
465
a1bafe90 466An example from the article addresses the problem of memorizing a sequence of
b4e539f7
EK
467binary digits. The example presented here is a slightly modified version of the
468one presented in the original article\citing{miller1956}, but it preserves the
3ab3e132 469essence of it. Let us say we have the following sequence of
f4cea2d6
EK
47016 binary digits: ``1010001001110011''. Most of us will have a hard time
471memorizing this sequence by only reading it once or twice. Imagine if we instead
472translate it to this sequence: ``A273''. If you have a background from computer
b4e539f7
EK
473science, it will be obvious that the latter sequence is the first sequence
474recoded to be represented by digits in base 16. Most people should be able to
f4cea2d6
EK
475memorize this last sequence by only looking at it once.
476
477Another result from the Miller article is that when the amount of information a
478human must interpret increases, it is crucial that the translation from one code
479to another must be almost automatic for the subject to be able to remember the
0d7fbd88 480translation, before \heshe is presented with new information to recode. Thus
f4cea2d6
EK
481learning and understanding how to best organize certain kinds of data is
482essential to efficiently handle that kind of data in the future. This is much
a1bafe90
EK
483like when humans learn to read. First they must learn how to recognize letters.
484Then they can learn distinct words, and later read sequences of words that form
485whole sentences. Eventually, most of them will be able to read whole books and
486briefly retell the important parts of its content. This suggest that the use of
b4e539f7
EK
487design patterns is a good idea when reasoning about computer programs. With
488extensive use of design patterns when creating complex program structures, one
489does not always have to read whole classes of code to comprehend how they
490function, it may be sufficient to only see the name of a class to almost fully
491understand its responsibilities.
f4cea2d6
EK
492
493\begin{quote}
494 Our language is tremendously useful for repackaging material into a few chunks
4cb06723 495 rich in information.~\cite[p.~95]{miller1956}
f4cea2d6 496\end{quote}
ee45c41f 497
a1bafe90 498Without further evidence, these results at least indicate that refactoring
f4cea2d6
EK
499source code into smaller units with higher cohesion and, when needed,
500introducing appropriate design patterns, should aid in the cause of creating
b4e539f7 501computer programs that are easier to maintain and have code that is easier (and
f4cea2d6
EK
502better) understood.
503
b4d90424 504\subsection{Notable contributions to the refactoring literature}
36d99783 505
d21ef41f
EK
506\begin{description}
507 \item[1992] William F. Opdyke submits his doctoral dissertation called
fe0a4c48
EK
508 \tit{Refactoring Object-Oriented Frameworks}\citing{opdyke1992}. This work
509 defines a set of refactorings, that are behavior preserving given that their
510 preconditions are met. The dissertation is focused on the automation of
511 refactorings.
512 \item[1999] Martin Fowler et al.: \tit{Refactoring: Improving the Design of
b5c7bb1b
EK
513 Existing Code}\citing{refactoring}. This is maybe the most influential text
514 on refactoring. It bares similarities with Opdykes thesis\citing{opdyke1992}
d21ef41f
EK
515 in the way that it provides a catalog of refactorings. But Fowler's book is
516 more about the craft of refactoring, as he focuses on establishing a
517 vocabulary for refactoring, together with the mechanics of different
518 refactorings and when to perform them. His methodology is also founded on
fe0a4c48
EK
519 the principles of test-driven development.
520 \item[2005] Joshua Kerievsky: \tit{Refactoring to
36d99783 521 Patterns}\citing{kerievsky2005}. This book is heavily influenced by Fowler's
fe0a4c48 522 \tit{Refactoring}\citing{refactoring} and the ``Gang of Four'' \tit{Design
36d99783
EK
523 Patterns}\citing{designPatterns}. It is building on the refactoring
524 catalogue from Fowler's book, but is trying to bridge the gap between
525 \emph{refactoring} and \emph{design patterns} by providing a series of
526 higher-level composite refactorings, that makes code evolve toward or away
fe0a4c48 527 from certain design patterns. The book is trying to build up the reader's
36d99783
EK
528 intuition around \emph{why} one would want to use a particular design
529 pattern, and not just \emph{how}. The book is encouraging evolutionary
e123ab03 530 design \see{relationToDesignPatterns}.
d21ef41f 531\end{description}
3b7c1d90 532
b4d90424 533\subsection{Tool support (for Java)}\label{toolSupport}
3ab3e132 534This section will briefly compare the refactoring support of the three IDEs
fe0a4c48
EK
535\name{Eclipse}\footnote{\url{http://www.eclipse.org/}}, \name{IntelliJ
536IDEA}\footnote{The IDE under comparison is the \name{Community Edition},
4e135659 537\url{http://www.jetbrains.com/idea/}} and
fe0a4c48 538\name{NetBeans}\footnote{\url{https://netbeans.org/}}. These are the most
4e135659
EK
539popular Java IDEs\citing{javaReport2011}.
540
541All three IDEs provide support for the most useful refactorings, like the
542different extract, move and rename refactorings. In fact, Java-targeted IDEs are
543known for their good refactoring support, so this did not appear as a big
544surprise.
545
546The IDEs seem to have excellent support for the \ExtractMethod refactoring, so
b4e539f7
EK
547at least they have all passed the first ``refactoring
548rubicon''\citing{fowlerRubicon2001,secondRubicon2012}.
4e135659 549
b4a197fd
EK
550Regarding the \MoveMethod refactoring, the \name{Eclipse} and \name{IntelliJ}
551IDEs do the job in very similar manners. In most situations they both do a
552satisfying job by producing the expected outcome. But they do nothing to check
553that the result does not break the semantics of the program \see{correctness}.
fe0a4c48 554The \name{NetBeans} IDE implements this refactoring in a somewhat
b4e539f7
EK
555unsophisticated way. For starters, the refactoring's default destination for the
556move, is the same class as the method already resides in, although it refuses to
557perform the refactoring if chosen. But the worst part is, that if moving the
558method \method{f} of the class \type{C} to the class \type{X}, it will break the
559code. The result is shown in \myref{lst:moveMethod_NetBeans}.
4e135659 560
347ed677
EK
561\begin{listing}
562\begin{multicols}{2}
4e135659
EK
563\begin{minted}[samepage]{java}
564public class C {
565 private X x;
566 ...
567 public void f() {
568 x.m();
569 x.n();
570 }
571}
572\end{minted}
573
347ed677 574\columnbreak
4e135659
EK
575
576\begin{minted}[samepage]{java}
577public class X {
578 ...
579 public void f(C c) {
580 c.x.m();
581 c.x.n();
582 }
583}
584\end{minted}
347ed677
EK
585\end{multicols}
586\caption{Moving method \method{f} from \type{C} to \type{X}.}
587\label{lst:moveMethod_NetBeans}
588\end{listing}
4e135659 589
fe0a4c48 590\name{NetBeans} will try to create code that call the methods \method{m} and \method{n}
4e135659 591of \type{X} by accessing them through \var{c.x}, where \var{c} is a parameter of
a1bafe90
EK
592type \type{C} that is added the method \method{f} when it is moved. (This is
593seldom the desired outcome of this refactoring, but ironically, this ``feature''
fe0a4c48 594keeps \name{NetBeans} from breaking the code in the example from \myref{correctness}.)
8b6b22c8 595If \var{c.x} for some reason is inaccessible to \type{X}, as in this case, the
fe0a4c48 596refactoring breaks the code, and it will not compile. \name{NetBeans} presents a
8b6b22c8
EK
597preview of the refactoring outcome, but the preview does not catch it if the IDE
598is about break the program.
4778044b 599
b4e539f7 600The IDEs under investigation seem to have fairly good support for primitive
fe0a4c48
EK
601refactorings, but what about more complex ones, such as
602\gloss{extractClass}\citing{refactoring}? \name{IntelliJ} handles this in a
603fairly good manner, although, in the case of private methods, it leaves unused
a1bafe90 604methods behind. These are methods that delegate to a field with the type of the
fe0a4c48
EK
605new class, but are not used anywhere. \name{Eclipse} has added its own quirk to
606the \refa{Extract Class} refactoring, and only allows for \emph{fields} to be
607moved to a new class, \emph{not methods}. This makes it effectively only
608extracting a data structure, and calling it \refa{Extract Class} is a little
b4e539f7 609misleading. One would often be better off with textual extract and paste than
fe0a4c48
EK
610using the \refa{Extract Class} refactoring in \name{Eclipse}. When it comes to
611\name{NetBeans}, it does not even show an attempt on providing this refactoring.
4778044b 612
b4d90424 613\subsection{The relation to design patterns}\label{relationToDesignPatterns}
4cb06723 614
fe0a4c48
EK
615Refactoring and design patterns have at least one thing in common, they are both
616promoted by advocates of \emph{clean code}\citing{cleanCode} as fundamental
617tools on the road to more maintainable and extendable source code.
4cb06723
EK
618
619\begin{quote}
620 Design patterns help you determine how to reorganize a design, and they can
621 reduce the amount of refactoring you need to do
622 later.~\cite[p.~353]{designPatterns}
623\end{quote}
624
a1bafe90
EK
625Although sometimes associated with
626over-engineering\citing{kerievsky2005,refactoring}, design patterns are in
627general assumed to be good for maintainability of source code. That may be
d1adbeef
EK
628because many of them are designed to support the \emph{open/closed principle} of
629object-oriented programming. The principle was first formulated by Bertrand
630Meyer, the creator of the Eiffel programming language, like this: ``Modules
631should be both open and closed.''\citing{meyer1988} It has been popularized,
632with this as a common version:
633
634\begin{quote}
635 Software entities (classes, modules, functions, etc.) should be open for
636 extension, but closed for modification.\footnote{See
637 \url{http://c2.com/cgi/wiki?OpenClosedPrinciple} or
638 \url{https://en.wikipedia.org/wiki/Open/closed_principle}}
639\end{quote}
640
641Maintainability is often thought of as the ability to be able to introduce new
a1bafe90 642functionality without having to change too much of the old code. When
d1adbeef
EK
643refactoring, the motivation is often to facilitate adding new functionality. It
644is about factoring the old code in a way that makes the new functionality being
645able to benefit from the functionality already residing in a software system,
646without having to copy old code into new. Then, next time someone shall add new
a1bafe90
EK
647functionality, it is less likely that the old code has to change. Assuming that
648a design pattern is the best way to get rid of duplication and assist in
649implementing new functionality, it is reasonable to conclude that a design
650pattern often is the target of a series of refactorings. Having a repertoire of
651design patterns can also help in knowing when and how to refactor a program to
652make it reflect certain desired characteristics.
d1adbeef
EK
653
654\begin{quote}
a1bafe90 655 There is a natural relation between patterns and refactorings. Patterns are
d1adbeef
EK
656 where you want to be; refactorings are ways to get there from somewhere
657 else.~\cite[p.~107]{refactoring}
658\end{quote}
659
660This quote is wise in many contexts, but it is not always appropriate to say
a1bafe90
EK
661``Patterns are where you want to be\ldots''. \emph{Sometimes}, patterns are
662where you want to be, but only because it will benefit your design. It is not
663true that one should always try to incorporate as many design patterns as
664possible into a program. It is not like they have intrinsic value. They only add
665value to a system when they support its design. Otherwise, the use of design
666patterns may only lead to a program that is more complex than necessary.
d1adbeef
EK
667
668\begin{quote}
669 The overuse of patterns tends to result from being patterns happy. We are
670 \emph{patterns happy} when we become so enamored of patterns that we simply
671 must use them in our code.~\cite[p.~24]{kerievsky2005}
672\end{quote}
673
674This can easily happen when relying largely on up-front design. Then it is
675natural, in the very beginning, to try to build in all the flexibility that one
676believes will be necessary throughout the lifetime of a software system.
677According to Joshua Kerievsky ``That sounds reasonable --- if you happen to be
678psychic.''~\cite[p.~1]{kerievsky2005} He is advocating what he believes is a
679better approach: To let software continually evolve. To start with a simple
680design that meets today's needs, and tackle future needs by refactoring to
681satisfy them. He believes that this is a more economic approach than investing
682time and money into a design that inevitably is going to change. By relying on
683continuously refactoring a system, its design can be made simpler without
684sacrificing flexibility. To be able to fully rely on this approach, it is of
e123ab03 685utter importance to have a reliable suit of tests to lean on \see{testing}. This
d1adbeef
EK
686makes the design process more natural and less characterized by difficult
687decisions that has to be made before proceeding in the process, and that is
688going to define a project for all of its unforeseeable future.
689
b4d90424 690\subsection{The impact on software quality}
00aa0588 691
b4d90424 692\subsubsection{What is software quality?}
00aa0588 693The term \emph{software quality} has many meanings. It all depends on the
9a55a5bc 694context we put it in. If we look at it with the eyes of a software developer, it
a1bafe90 695usually means that the software is easily maintainable and testable, or in other
9a55a5bc
EK
696words, that it is \emph{well designed}. This often correlates with the
697management scale, where \emph{keeping the schedule} and \emph{customer
137e0e7b
EK
698satisfaction} is at the center. From the customers point of view, in addition to
699good usability, \emph{performance} and \emph{lack of bugs} is always
700appreciated, measurements that are also shared by the software developer. (In
701addition, such things as good documentation could be measured, but this is out
702of the scope of this document.)
9a55a5bc 703
b4d90424 704\subsubsection{The impact on performance}
9a55a5bc 705\begin{quote}
a1bafe90
EK
706 Refactoring certainly will make software go more slowly\footnote{With todays
707 compiler optimization techniques and performance tuning of e.g. the Java
708virtual machine, the penalties of object creation and method calls are
709debatable.}, but it also makes the software more amenable to performance
710tuning.~\cite[p.~69]{refactoring}
9a55a5bc 711\end{quote}
ee45c41f
EK
712
713\noindent There is a common belief that refactoring compromises performance, due
714to increased degree of indirection and that polymorphism is slower than
9a55a5bc
EK
715conditionals.
716
b5c7bb1b 717In a survey, Demeyer\citing{demeyer2002} disproves this view in the case of
a1bafe90 718polymorphism. He did an experiment on, what he calls, ``Transform Self Type
9a55a5bc
EK
719Checks'' where you introduce a new polymorphic method and a new class hierarchy
720to get rid of a class' type checking of a ``type attribute``. He uses this kind
721of transformation to represent other ways of replacing conditionals with
722polymorphism as well. The experiment is performed on the C++ programming
a1bafe90
EK
723language and with three different compilers and platforms. Demeyer concludes
724that, with compiler optimization turned on, polymorphism beats middle to large
725sized if-statements and does as well as case-statements. (In accordance with
726his hypothesis, due to similarities between the way C++ handles polymorphism and
727case-statements.)
ee45c41f 728
9a55a5bc
EK
729\begin{quote}
730 The interesting thing about performance is that if you analyze most programs,
b5c7bb1b 731 you find that they waste most of their time in a small fraction of the
4cb06723 732 code.~\cite[p.~70]{refactoring}
9a55a5bc 733\end{quote}
9a55a5bc 734
ee45c41f
EK
735\noindent So, although an increased amount of method calls could potentially
736slow down programs, one should avoid premature optimization and sacrificing good
fe0a4c48
EK
737design, leaving the performance tuning until after \gloss{profiling} the
738software and having isolated the actual problem areas.
00aa0588 739
b4d90424 740\subsection{Composite refactorings}\label{compositeRefactorings}
6065c96c 741Generally, when thinking about refactoring, at the mechanical level, there are
f65da046 742essentially two kinds of refactorings. There are the \emph{primitive}
a1bafe90 743refactorings, and the \emph{composite} refactorings.
6065c96c 744
6c51af15
EK
745\definition{A \emph{primitive refactoring} is a refactoring that cannot be
746expressed in terms of other refactorings.}
f65da046 747
fe0a4c48 748\noindent Examples are the \refa{Pull Up Field} and \refa{Pull Up
a1bafe90 749Method} refactorings\citing{refactoring}, that move members up in their class
ee45c41f
EK
750hierarchies.
751
6c51af15
EK
752\definition{A \emph{composite refactoring} is a refactoring that can be
753expressed in terms of two or more other refactorings.}
f65da046 754
fe0a4c48 755\noindent An example of a composite refactoring is the \refa{Extract
b5c7bb1b
EK
756Superclass} refactoring\citing{refactoring}. In its simplest form, it is composed
757of the previously described primitive refactorings, in addition to the
fe0a4c48 758\refa{Pull Up Constructor Body} refactoring\citing{refactoring}. It works
b5c7bb1b 759by creating an abstract superclass that the target class(es) inherits from, then
fe0a4c48
EK
760by applying \refa{Pull Up Field}, \refa{Pull Up Method} and
761\refa{Pull Up Constructor Body} on the members that are to be members of
f5fb40e4
EK
762the new superclass. If there are multiple classes in play, their interfaces may
763need to be united with the help of some rename refactorings, before extracting
fe0a4c48 764the superclass. For an overview of the \refa{Extract Superclass}
8b6b22c8 765refactoring, see \myref{fig:extractSuperclass}.
6065c96c 766
ddcea0b5
EK
767\begin{figure}[h]
768 \centering
faa9f4f3 769 \includegraphics[angle=270,width=\linewidth]{extractSuperclassItalic.pdf}
f5fb40e4 770 \caption{The Extract Superclass refactoring, with united interfaces.}
ddcea0b5
EK
771 \label{fig:extractSuperclass}
772\end{figure}
6065c96c 773
b4d90424 774\subsection{Manual vs. automated refactorings}
0d7fbd88 775Refactoring is something every programmer does, even if \heshe does not known
f65da046
EK
776the term \emph{refactoring}. Every refinement of source code that does not alter
777the program's behavior is a refactoring. For small refactorings, such as
0d7fbd88 778\ExtractMethod, executing it manually is a manageable task, but is still prone
a1bafe90 779to errors. Getting it right the first time is not easy, considering the method
f65da046
EK
780signature and all the other aspects of the refactoring that has to be in place.
781
f5fb40e4
EK
782Consider the renaming of classes, methods and fields. For complex programs these
783refactorings are almost impossible to get right. Attacking them with textual
784search and replace, or even regular expressions, will fall short on these tasks.
785Then it is crucial to have proper tool support that can perform them
786automatically. Tools that can parse source code and thus have semantic knowledge
787about which occurrences of which names belong to what construct in the program.
788For even trying to perform one of these complex task manually, one would have to
789be very confident on the existing test suite \see{testing}.
00aa0588 790
b4d90424 791\subsection{Correctness of refactorings}\label{correctness}
f65da046 792For automated refactorings to be truly useful, they must show a high degree of
f5fb40e4
EK
793behavior preservation. This last sentence might seem obvious, but there are
794examples of refactorings in existing tools that break programs. In an ideal
795world, every automated refactoring would be ``complete'', in the sense that it
796would never break a program. In an ideal world, every program would also be free
797from bugs. In modern IDEs the implemented automated refactorings are working for
798\emph{most} cases, that is enough for making them useful.
799
800I will now present an example of a \emph{corner case} where a program breaks
801when a refactoring is applied. The example shows an \ExtractMethod refactoring
802followed by a \MoveMethod refactoring that breaks a program in both the
fe0a4c48 803\name{Eclipse} and \name{IntelliJ} IDEs\footnote{The \name{NetBeans} IDE handles this
3ab3e132 804 particular situation without altering the program's behavior, mainly because
fe0a4c48 805 its \refa{Move Method} refactoring implementation is a bit flawed in other ways
f5fb40e4
EK
806 \see{toolSupport}.}. The target and the destination for the composed
807 refactoring is shown in \myref{lst:correctnessExtractAndMove}. Note that the
808 method \method{m(C c)} of class \type{X} assigns to the field \var{x} of the
809 argument \var{c} that has type \type{C}.
810
811\begin{listing}[h]
812\begin{multicols}{2}
53be7239
EK
813\begin{minted}[linenos,frame=topline,label={Refactoring
814 target},framesep=\mintedframesep]{java}
ddcea0b5 815public class C {
f5fb40e4 816 public X x = new X();
ee45c41f 817
f5fb40e4
EK
818 public void f() {
819 x.m(this);
820 // Not the same x
821 x.n();
822 }
ddcea0b5
EK
823}
824\end{minted}
ee45c41f 825
f5fb40e4 826\columnbreak
ee45c41f 827
53be7239
EK
828\begin{minted}[frame=topline,label={Method
829 destination},framesep=\mintedframesep]{java}
ee45c41f 830public class X {
f5fb40e4
EK
831 public void m(C c) {
832 c.x = new X();
833 // If m is called from
834 // c, then c.x no longer
835 // equals 'this'
836 }
837 public void n() {}
ee45c41f
EK
838}
839\end{minted}
f5fb40e4
EK
840\end{multicols}
841\caption{The target and the destination for the composition of the Extract
fe0a4c48 842Method and \refa{Move Method} refactorings.}
f5fb40e4
EK
843\label{lst:correctnessExtractAndMove}
844\end{listing}
ee45c41f 845
f5fb40e4
EK
846
847The refactoring sequence works by extracting line 6 through 8 from the original
3510e539 848class \type{C} into a method \method{f} with the statements from those lines as
f5fb40e4
EK
849its method body (but with the comment left out, since it will no longer hold any
850meaning). The method is then moved to the class \type{X}. The result is shown
851in \myref{lst:correctnessExtractAndMoveResult}.
ee45c41f 852
f5fb40e4
EK
853Before the refactoring, the methods \method{m} and \method{n} of class \type{X}
854are called on different object instances (see line 6 and 8 of the original class
855\type{C} in \cref{lst:correctnessExtractAndMove}). After the refactoring, they
856are called on the same object, and the statement on line
8573 of class \type{X} (in \cref{lst:correctnessExtractAndMoveResult}) no longer
858 has the desired effect in our example. The method \method{f} of class \type{C}
859 is now calling the method \method{f} of class \type{X} (see line 5 of class
860 \type{C} in \cref{lst:correctnessExtractAndMoveResult}), and the program now
861 behaves different than before.
862
863\begin{listing}[h]
864\begin{multicols}{2}
865\begin{minted}[linenos]{java}
ee45c41f
EK
866public class C {
867 public X x = new X();
868
869 public void f() {
870 x.f(this);
871 }
872}
873\end{minted}
874
f5fb40e4
EK
875\columnbreak
876
877\begin{minted}[linenos]{java}
ee45c41f
EK
878public class X {
879 public void m(C c) {
880 c.x = new X();
881 }
882 public void n() {}
f5fb40e4
EK
883 // Extracted and
884 // moved method
ee45c41f
EK
885 public void f(C c) {
886 m(c);
887 n();
888 }
889}
890\end{minted}
f5fb40e4
EK
891\end{multicols}
892\caption{The result of the composed refactoring.}
893\label{lst:correctnessExtractAndMoveResult}
894\end{listing}
ddcea0b5 895
aa1e3779
EK
896The bug introduced in the previous example is of such a nature\footnote{Caused
897 by aliasing. See \url{https://en.wikipedia.org/wiki/Aliasing_(computing)}}
898 that it is very difficult to spot if the refactored code is not covered by
899 tests. It does not generate compilation errors, and will thus only result in
900 a runtime error or corrupted data, which might be hard to detect.
19c4f27d 901
b4d90424 902\subsection{Refactoring and the importance of testing}\label{testing}
19c4f27d
EK
903\begin{quote}
904 If you want to refactor, the essential precondition is having solid
905 tests.\citing{refactoring}
906\end{quote}
907
29f39f29
EK
908When refactoring, there are roughly three classes of errors that can be made.
909The first class of errors are the ones that make the code unable to compile.
910These \emph{compile-time} errors are of the nicer kind. They flash up at the
911moment they are made (at least when using an IDE), and are usually easy to fix.
912The second class are the \emph{runtime} errors. Although they take a bit longer
913to surface, they usually manifest after some time in an illegal argument
914exception, null pointer exception or similar during the program execution.
915These kind of errors are a bit harder to handle, but at least they will show,
916eventually. Then there are the \emph{behavior-changing} errors. These errors are
917of the worst kind. They do not show up during compilation and they do not turn
918on a blinking red light during runtime either. The program can seem to work
919perfectly fine with them in play, but the business logic can be damaged in ways
920that will only show up over time.
921
922For discovering runtime errors and behavior changes when refactoring, it is
923essential to have good test coverage. Testing in this context means writing
924automated tests. Manual testing may have its uses, but when refactoring, it is
925automated unit testing that dominate. For discovering behavior changes it is
926especially important to have tests that cover potential problems, since these
927kind of errors does not reveal themselves.
928
929Unit testing is not a way to \emph{prove} that a program is correct, but it is a
3ab3e132 930way to make you confident that it \emph{probably} works as desired. In the
4928aa0b 931context of test-driven development (commonly known as TDD), the tests are even a
29f39f29
EK
932way to define how the program is \emph{supposed} to work. It is then, by
933definition, working if the tests are passing.
19c4f27d
EK
934
935If the test coverage for a code base is perfect, then it should, theoretically,
29f39f29
EK
936be risk-free to perform refactorings on it. This is why automated tests and
937refactoring are such a great match.
f65da046 938
b4d90424 939\subsubsection{Testing the code from correctness section}
29f39f29
EK
940The worst thing that can happen when refactoring is to introduce changes to the
941behavior of a program, as in the example on \myref{correctness}. This example
942may be trivial, but the essence is clear. The only problem with the example is
943that it is not clear how to create automated tests for it, without changing it
944in intrusive ways.
945
20bcc7bf 946Unit tests, as they are known from the different \glosspl{xUnit} around, are
29f39f29
EK
947only suitable to test the \emph{result} of isolated operations. They can not
948easily (if at all) observe the \emph{history} of a program.
b5d53f51 949
a13e5650 950This problem is still open.
116805bf 951
a13e5650 952\begin{comment}
116805bf
EK
953
954Assuming a sequential (non-concurrent) program:
955
956\begin{minted}{java}
957tracematch (C c, X x) {
958 sym m before:
959 call(* X.m(C)) && args(c) && cflow(within(C));
960 sym n before:
961 call(* X.n()) && target(x) && cflow(within(C));
962 sym setCx after:
963 set(C.x) && target(c) && !cflow(m);
964
965 m n
966
967 { assert x == c.x; }
968}
969\end{minted}
970
971%\begin{minted}{java}
972%tracematch (X x1, X x2) {
973% sym m before:
974% call(* X.m(C)) && target(x1);
975% sym n before:
976% call(* X.n()) && target(x2);
977% sym setX after:
978% set(C.x) && !cflow(m) && !cflow(n);
979%
980% m n
981%
982% { assert x1 != x2; }
983%}
984%\end{minted}
a13e5650 985\end{comment}
116805bf 986
222d172b 987
b4d90424 988\section{The Project}
222d172b 989
b4d90424 990\subsection{Project description}
60065669 991The aim of this master's project will be to explore the relationship between the
0fe1a611
EK
992\ExtractMethod and the \MoveMethod refactorings. This will be done by composing
993the two into a composite refactoring. The refactoring will be called the
6a779665
EK
994\ExtractAndMoveMethod refactoring.
995
996The two primitive \ExtractMethod and \MoveMethod refactorings must already be
997implemented in a tool, so the \ExtractAndMoveMethod refactoring is going to be
998built on top of those.
b5d53f51 999
a13e5650
EK
1000The composition of the \ExtractMethod and \MoveMethod refactorings springs
1001naturally out of the need to move procedures closer to the data they manipulate.
1002This composed refactoring is not well described in the literature, but it is
1003implemented in at least one tool called
fe0a4c48
EK
1004\name{CodeRush}\footnote{\url{https://help.devexpress.com/\#CodeRush/CustomDocument3519}},
1005that is an extension for \name{MS Visual
b5d53f51 1006Studio}\footnote{\url{http://www.visualstudio.com/}}. In CodeRush it is called
fe0a4c48 1007\refa{Extract Method to
b5d53f51 1008Type}\footnote{\url{https://help.devexpress.com/\#CodeRush/CustomDocument6710}},
0e6e57d3 1009but I choose to call it \ExtractAndMoveMethod, since I feel this better
b5d53f51
EK
1010communicates which primitive refactorings it is composed of.
1011
0fe1a611 1012The project will consist of implementing the \ExtractAndMoveMethod refactoring,
0e6e57d3 1013as well as executing it over a larger code base, as a case study. To be able to
0fe1a611 1014execute the refactoring automatically, I have to make it analyze code to
0e6e57d3 1015determine the best selections to extract into new methods.
70905ddc 1016
b4d90424 1017\subsection{The primitive refactorings}
021508ad
EK
1018The refactorings presented here are the primitive refactorings used in this
1019project. They are the abstract building blocks used by the \ExtractAndMoveMethod
1020refactoring.
04e21f15 1021
b4d90424 1022\paragraph{The Extract Method refactoring}
04e21f15
EK
1023The \refa{Extract Method} refactoring is used to extract a fragment of code
1024from its context and into a new method. A call to the new method is inlined
d516ac0b
EK
1025where the fragment was before. It is used to break code into logical units, with
1026names that explain their purpose.
1027
1028An example of an \ExtractMethod refactoring is shown in
1029\myref{lst:extractMethodRefactoring}. It shows a method containing calls to the
1030methods \method{foo} and \method{bar} of a type \type{X}. These statements are
1031then extracted into the new method \method{fooBar}.
d516ac0b 1032
021508ad 1033\begin{listing}[h]
d516ac0b 1034 \begin{multicols}{2}
53be7239 1035 \begin{minted}[samepage,frame=topline,label={Before},framesep=\mintedframesep]{java}
d516ac0b
EK
1036 class C {
1037 void method() {
1038 X x = new X();
1039 x.foo(); x.bar();
1040 }
1041 }
1042 \end{minted}
1043
1044 \columnbreak
1045
53be7239 1046 \begin{minted}[samepage,frame=topline,label={After},framesep=\mintedframesep]{java}
d516ac0b
EK
1047 class C {
1048 void method() {
1049 X x = new X();
1050 fooBar(x);
1051 }
d516ac0b
EK
1052 void fooBar(X x) {
1053 x.foo(); x.bar();
1054 }
1055 }
1056 \end{minted}
1057 \end{multicols}
1058 \caption{An example of an \ExtractMethod refactoring.}
1059 \label{lst:extractMethodRefactoring}
1060\end{listing}
04e21f15 1061
b4d90424 1062\paragraph{The Move Method refactoring}
04e21f15 1063The \refa{Move Method} refactoring is used to move a method from one class to
4306ef44
EK
1064another. This can be appropriate if the method is using more features of another
1065class than of the class which it is currently defined.
1066
1067\Myref{lst:moveMethodRefactoring} shows an example of this refactoring. Here a
1068method \method{fooBar} is moved from the class \type{C} to the class \type{X}.
1069
021508ad 1070\begin{listing}[h]
4306ef44 1071 \begin{multicols}{2}
53be7239 1072 \begin{minted}[samepage,frame=topline,label={Before},framesep=\mintedframesep]{java}
4306ef44
EK
1073 class C {
1074 void method() {
1075 X x = new X();
1076 fooBar(x);
1077 }
1078 void fooBar(X x) {
1079 x.foo(); x.bar();
1080 }
1081 }
1082
1083 class X {
1084 void foo(){/*...*/}
1085 void bar(){/*...*/}
1086 }
1087 \end{minted}
1088
1089 \columnbreak
1090
53be7239 1091 \begin{minted}[samepage,frame=topline,label={After},framesep=\mintedframesep]{java}
4306ef44
EK
1092 class C {
1093 void method() {
1094 X x = new X();
1095 x.fooBar();
1096 }
1097 }
1098
1099 class X {
1100 void fooBar() {
1101 foo(); bar();
1102 }
1103 void foo(){/*...*/}
1104 void bar(){/*...*/}
1105 }
1106 \end{minted}
1107 \end{multicols}
1108 \caption{An example of a \MoveMethod refactoring.}
1109 \label{lst:moveMethodRefactoring}
1110\end{listing}
04e21f15 1111
b4d90424 1112\subsection{The Extract and Move Method refactoring}
021508ad
EK
1113The \ExtractAndMoveMethod refactoring is a composite refactoring composed of the
1114primitive \ExtractMethod and \MoveMethod refactorings. The effect of this
1115refactoring on source code is the same as when extracting a method and moving it
e36eade0 1116to another class. Conceptually, this is done without an intermediate step. In
021508ad
EK
1117practice, as we shall see later, an intermediate step may be necessary.
1118
1119An example of this composite refactoring is shown in
1120\myref{lst:extractAndMoveMethodRefactoring}. The example joins the examples from
1121\cref{lst:extractMethodRefactoring} and \cref{lst:moveMethodRefactoring}. This
1122means that the selection consisting of the consecutive calls to the methods
1123\method{foo} and \method{bar}, is extracted into a new method \method{fooBar}
1124located in the class \type{X}.
1125
1126\begin{listing}[h]
1127 \begin{multicols}{2}
53be7239 1128 \begin{minted}[samepage,frame=topline,label={Before},framesep=\mintedframesep]{java}
021508ad
EK
1129 class C {
1130 void method() {
1131 X x = new X();
1132 x.foo(); x.bar();
1133 }
1134 }
1135
1136 class X {
1137 void foo(){/*...*/}
1138 void bar(){/*...*/}
1139 }
1140 \end{minted}
1141
1142 \columnbreak
1143
53be7239 1144 \begin{minted}[samepage,frame=topline,label={After},framesep=\mintedframesep]{java}
021508ad
EK
1145 class C {
1146 void method() {
1147 X x = new X();
1148 x.fooBar();
1149 }
1150 }
04e21f15 1151
021508ad
EK
1152 class X {
1153 void fooBar() {
1154 foo(); bar();
1155 }
1156 void foo(){/*...*/}
1157 void bar(){/*...*/}
1158 }
1159 \end{minted}
1160 \end{multicols}
1161 \caption{An example of the \ExtractAndMoveMethod refactoring.}
1162 \label{lst:extractAndMoveMethodRefactoring}
1163\end{listing}
04e21f15 1164
b4d90424 1165\subsection{Research questions}
0fe1a611
EK
1166The main question that I seek an answer to in this thesis is:
1167
1168\begin{quote}
1169 Is it possible to automate the analysis and execution of the
1170 \ExtractAndMoveMethod refactoring, and do so for all of the code of a larger
1171 project?
1172\end{quote}
1173
1174\noindent The secondary questions will then be:
1175
1176\paragraph{Can we do this efficiently?} Can we automate the analysis and
1177execution of the refactoring so it can be run in a reasonable amount of time?
1178And what does \emph{reasonable} mean in this context?
1179
1180And, assuming the refactoring does in fact improve the quality of source code:
1181
1182\paragraph{How can the automation of the refactoring be helpful?} What is the
e36eade0 1183usefulness of the refactoring in a software development setting? In what parts
0fe1a611
EK
1184of the development process can the refactoring play a role?
1185
b4d90424 1186\subsection{The premises}
0cbeeadb 1187\todoin{Appropriate name?}
b4d90424
EK
1188
1189\subsubsection{Choosing the target language}
70905ddc
EK
1190Choosing which programming language the code that shall be manipulated shall be
1191written in, is not a very difficult task. We choose to limit the possible
1192languages to the object-oriented programming languages, since most of the
1193terminology and literature regarding refactoring comes from the world of
1194object-oriented programming. In addition, the language must have existing tool
1195support for refactoring.
1196
fe0a4c48 1197The \name{Java} programming language\footnote{\url{https://www.java.com/}} is
70905ddc
EK
1198the dominating language when it comes to example code in the literature of
1199refactoring, and is thus a natural choice. Java is perhaps, currently the most
fe0a4c48 1200influential programming language in the world, with its \name{Java Virtual
70905ddc 1201Machine} that runs on all of the most popular architectures and also supports
e36eade0 1202dozens of other programming languages\footnote{They compile to Java bytecode.},
fe0a4c48 1203with \name{Scala}, \name{Clojure} and \name{Groovy} as the most prominent ones.
70905ddc
EK
1204Java is currently the language that every other programming language is compared
1205against. It is also the primary programming language for the author of this
1206thesis.
3f929fcc 1207
b4d90424 1208\subsubsection{Choosing the tools}
3ab3e132 1209When choosing a tool for manipulating Java, there are certain criteria that
3f929fcc
EK
1210have to be met. First of all, the tool should have some existing refactoring
1211support that this thesis can build upon. Secondly it should provide some kind of
1212framework for parsing and analyzing Java source code. Third, it should itself be
1213open source. This is both because of the need to be able to browse the code for
1214the existing refactorings that is contained in the tool, and also because open
1215source projects hold value in them selves. Another important aspect to consider
1216is that open source projects of a certain size, usually has large communities of
3ab3e132
EK
1217people connected to them, that are committed to answering questions regarding the
1218use and misuse of the products, that to a large degree is made by the community
3f929fcc
EK
1219itself.
1220
3ab3e132 1221There is a certain class of tools that meet these criteria, namely the class of
3f929fcc 1222\emph{IDEs}\footnote{\emph{Integrated Development Environment}}. These are
3ab3e132 1223programs that is meant to support the whole production cycle of a computer
3f929fcc
EK
1224program, and the most popular IDEs that support Java, generally have quite good
1225refactoring support.
1226
fe0a4c48
EK
1227The main contenders for this thesis is the \name{Eclipse IDE}, with the
1228\name{Java development tools} (JDT), the \name{IntelliJ IDEA Community Edition}
1229and the \name{NetBeans IDE} \see{toolSupport}. \name{Eclipse} and
1230\name{NetBeans} are both free, open source and community driven, while the
1231\name{IntelliJ IDEA} has an open sourced community edition that is free of
1232charge, but also offer an \name{Ultimate Edition} with an extended set of
1233features, at additional cost. All three IDEs supports adding plugins to extend
1234their functionality and tools that can be used to parse and analyze Java source
1235code. But one of the IDEs stand out as a favorite, and that is the \name{Eclipse
1236IDE}. This is the most popular\citing{javaReport2011} among them and seems to be
1237de facto standard IDE for Java development regardless of platform.
4e135659 1238
a5317dcf 1239
aceb848c 1240\chapter{The search-based Extract and Move Method refactoring}
9503a520
EK
1241In this chapter I will delve into the workings of the search-based
1242\ExtractAndMoveMethod refactoring. We will see the choices it must make along
1243the way and why it chooses a text selection as a candidate for refactoring or
1244not.
1245
e36eade0 1246After defining some concepts, I will introduce an example that will be used
9503a520
EK
1247throughout the chapter to illustrate how the refactoring works in some simple
1248situations.
c894b297
EK
1249
1250\section{The inputs to the refactoring}
1251For executing an \ExtractAndMoveMethod refactoring, there are two simple
f5355077 1252requirements. The first thing the refactoring needs is a text selection, telling
c894b297
EK
1253it what to extract. Its second requirement is a target for the subsequent move
1254operation.
1255
1256The extracted method must be called instead of the selection that makes up its
1257body. Also, the method call has to be performed via a variable, since the method
aceb848c
EK
1258is not static. Therefore, the move target must be a variable in the scope of the
1259extracted selection. The actual new location for the extracted method will be
1260the class representing the type of the move target variable. But, since the
1261method also must be called through a variable, it makes sense to define the move
1262target to be either a local variable or a field in the scope of the text
1263selection.
c894b297 1264
53be7239
EK
1265\section{Defining a text selection}
1266A text selection, in our context, is very similar to what you think of when
1267selecting a bit of text in your editor or other text processing tool with your
1268mouse or keyboard. It is an abstract construct that is meant to capture which
1269specific portion of text we are about to deal with.
1270
1271To be able to clearly reason about a text selection done to a portion of text in
1272a computer file, that consist of pure text, we put up the following definition.
1273
1274\definition{A \emph{text selection} in a text file is defined by two
1275non-negative integers, in addition to a reference to the file itself. The first
1276integer is an offset into the file, while the second reference is the length of
1277the text selection.}
1278
1279This means that the selected text consist of a number of characters equal to the
1280length of the selection, where the first character is found at the specified
1281offset.
1282
b4d90424
EK
1283\section{Where we look for text selections}
1284
1285\subsection{Text selections are found in methods}
1286The text selections we are interested in are those that surrounds program
1287statements. Therefore, the place we look for selections that can form candidates
1288for an execution of the \ExtractAndMoveMethod refactoring, is within the body of
1289a single method.
1290
1291\paragraph{On ignoring static methods}
1292In this project we are not analyzing static methods for candidates to the
1293\ExtractAndMoveMethod refactoring. The reason for this is that in the cases
1294where we want to perform the refactoring for a selection within a static method,
1295the first step is to extract the selection into a new method. Hence this method
1296also become static, since it must be possible to call it from a static context.
1297It would then be difficult to move the method to another class, make it
1298non-static and calling it through a variable. To avoid these obstacles, we
1299simply ignore static methods.
1300
53be7239
EK
1301\begin{listing}[htb]
1302\def\charwidth{5.8pt}
9db7f4cf 1303\def\indent{2*\charwidth}
53be7239
EK
1304\def\lineheight{\baselineskip}
1305\def\mintedtop{2*\lineheight+5.8pt}
1306
1307\begin{tikzpicture}[overlay, yscale=-1, xshift=3.8pt+\charwidth*31]
1308 \tikzstyle{overlaybox}=[fill=lightgray,opacity=0.2]
1309 % Level 1
1310 \draw[overlaybox] (\indent,\mintedtop+\lineheight*4) rectangle
9db7f4cf 1311 +(23*\charwidth,17*\lineheight);
53be7239
EK
1312
1313 % Level 2
1314 \draw[overlaybox] (2*\indent,\mintedtop+5*\lineheight) rectangle
9db7f4cf 1315 +(15*\charwidth,3*\lineheight);
53be7239 1316 \draw[overlaybox] (2*\indent,\mintedtop+15*\lineheight) rectangle
9db7f4cf 1317 +(15*\charwidth,3*\lineheight);
53be7239 1318 \draw[overlaybox] (2*\indent,\mintedtop+19*\lineheight) rectangle
9db7f4cf 1319 +(15*\charwidth,\lineheight);
53be7239
EK
1320\end{tikzpicture}
1321 \begin{multicols}{2}
01d46361 1322 \begin{minted}[linenos,frame=topline,label=Clean,framesep=\mintedframesep]{java}
53be7239 1323class C {
9db7f4cf
EK
1324 A a; B b; boolean bool;
1325
1326 void method(int val) {
1327 if (bool) {
1328 a.foo();
1329 a = new A();
1330 a.bar();
1331 }
1332
1333 a.foo();
1334 a.bar();
1335
1336 switch (val) {
1337 case 1:
1338 b.a.foo();
1339 b.a.bar();
1340 break;
1341 default:
1342 a.foo();
53be7239 1343 }
9db7f4cf 1344 }
53be7239
EK
1345}
1346\end{minted}
1347
1348\columnbreak
1349
9db7f4cf
EK
1350\begin{minted}[frame=topline,label={With statement
1351 sequences},framesep=\mintedframesep]{java}
53be7239 1352class C {
9db7f4cf
EK
1353 A a; B b; boolean bool;
1354
1355 void method(int val) {
1356 if (bool) {
1357 a.foo();
1358 a = new A();
1359 a.bar();
53be7239 1360 }
9db7f4cf
EK
1361
1362 a.foo();
1363 a.bar();
1364
1365 switch (val) {
1366 case 1:
1367 b.a.foo();
1368 b.a.bar();
1369 break;
1370 default:
1371 a.foo();
1372 }
1373 }
53be7239
EK
1374}
1375\end{minted}
1376
1377 \end{multicols}
1378\caption{Classes \type{A} and \type{B} are both public. The methods
1379\method{foo} and \method{bar} are public members of class \type{A}.}
1380\label{lst:grandExample}
1381\end{listing}
1382
b4d90424 1383\subsection{The possible text selections of a method body}
0cbeeadb 1384\todoin{dummy todo}
b4d90424
EK
1385The number of possible text selections that can be made from the text in a
1386method body, are equal to all the sub-sequences of characters within it. For our
f5355077
EK
1387purposes, analyzing program source code, we must define what it means for a text
1388selection to be valid.
53be7239
EK
1389
1390\definition{A \emph{valid text selection} is a text selection that contains all
1391of one or more consecutive program statements.}
1392
1393For a sequence of statements, the text selections that can be made from it, are
1394equal to all its sub-sequences. \Myref{lst:textSelectionsExample} show an
1395example of all the text selections that can be made from the code in
e36eade0 1396\myref{lst:grandExample}, lines 16-18. For convenience and the clarity of this
01d46361
EK
1397example, the text selections are represented as tuples with the start and end
1398line of all selections: $\{(16), (17), (18), (16,17), (16,18), (17,18)\}$.
53be7239
EK
1399
1400\begin{listing}[htb]
1401\def\charwidth{5.7pt}
1402\def\indent{4*\charwidth}
1403\def\lineheight{\baselineskip}
01d46361 1404\def\mintedtop{\lineheight-1pt}
53be7239
EK
1405
1406\begin{tikzpicture}[overlay, yscale=-1]
1407 \tikzstyle{overlaybox}=[fill=lightgray,opacity=0.2]
1408
1409 % First statement
1410 \draw[overlaybox] (2*\charwidth,\mintedtop) rectangle
1411 +(16*\charwidth,\lineheight);
1412
1413 % Second statement
1414 \draw[overlaybox] (2*\charwidth,\mintedtop+\lineheight) rectangle
1415 +(16*\charwidth,\lineheight);
1416
1417 % Third statement
1418 \draw[overlaybox] (2*\charwidth,\mintedtop+2*\lineheight) rectangle
1419 +(16*\charwidth,\lineheight);
1420
1421 \draw[overlaybox] (\indent-3*\charwidth,\mintedtop) rectangle
1422 +(18*\charwidth,2*\lineheight);
1423
1424 \draw[overlaybox] (3*\charwidth,\mintedtop+\lineheight) rectangle
1425 +(14*\charwidth,2*\lineheight);
1426
1427 % All
1428 \draw[overlaybox] (\indent,\mintedtop) rectangle
1429 +(12*\charwidth,3*\lineheight);
1430\end{tikzpicture}
1431% indent should be 5 spaces
9503a520 1432\begin{minted}[linenos,firstnumber=16]{java}
53be7239
EK
1433 b.a.foo();
1434 b.a.bar();
1435 break;
1436\end{minted}
1437\caption{Example of how the text selections generator would generate text
1438 selections based on a lists of statements. Each highlighted rectangle
1439represents a text selection.}
1440\label{lst:textSelectionsExample}
1441\end{listing}
1442
1443Each nesting level of a method body can have many such sequences of statements.
9503a520 1444The outermost nesting level has one such sequence, and each branch contains
01d46361
EK
1445their own sequence of statements. \Myref{lst:grandExample} has a version of some
1446code where all such sequences of statements are highlighted for a method body.
47c0bea8 1447
9503a520
EK
1448To complete our example of possible text selections, I will now list all
1449possible text selections for the method in \myref{lst:grandExample}, by nesting
f5355077 1450level. There are 23 of them in total.
47c0bea8 1451
9503a520
EK
1452\begin{description}
1453 \item[Level 1 (10 selections)] \hfill \\
1454 $\{(5,9), (11), (12), (14,21), (5,11), (5,12), (5,21), (11,12),
1455 (11,21), \\(12,21)\}$
8de7cf3c 1456
9503a520
EK
1457 \item[Level 2 (13 selections)] \hfill \\
1458 $\{(6), (7), (8), (6,7), (6,8), (7,8), (16), (17), (18), (16,17), (16,18), \\
1459 (17,18), (20)\}$
1460\end{description}
c894b297 1461
b4d90424
EK
1462\subsubsection{The complexity}\label{sec:complexity}
1463The complexity of how many text selections that needs to be analyzed for a body
1464of in total $n$ statements, is bounded by $O(n^2)$. A body of statements is here
1465all the statements in all nesting levels of a sequence of statements. A method
1466body (or a block) is a body of statements. To prove that the complexity is
1467bounded by $O(n^2)$, I present a couple of theorems and proves them.
1468
1469\begin{theorem}
1470The number of text selections that need to be analyzed for each list of
1471statements of length $n$, is exactly
1472
1473\begin{equation*}
1474 \sum_{i=1}^{n} i = \frac{n(n+1)}{2}
1475 \label{eq:complexityStatementList}
1476\end{equation*}
1477\label{thm:numberOfTextSelection}
1478\end{theorem}
1479
1480\begin{proof}
1481 For $n=1$ this is trivial: $\frac{1(1+1)}{2} = \frac{2}{2} = 1$. One statement
1482 equals one selection.
1483
1484 For $n=2$, you get one text selection for the first statement, one selection
1485 for the second statement, and one selection for the two of them combined.
1486 This equals three selections. $\frac{2(2+1)}{2} = \frac{6}{2} = 3$.
1487
1488 For $n=3$, you get 3 selections for the two first statements, as in the case
1489 where $n=2$. In addition you get one selection for the third statement itself,
1490 and two more statements for the combinations of it with the two previous
1491 statements. This equals six selections. $\frac{3(3+1)}{2} = \frac{12}{2} = 6$.
1492
1493 Assume that for $n=k$ there exists $\frac{k(k+1)}{2}$ text selections. Then we
1494 want to add selections for another statement, following the previous $k$
1495 statements. So, for $n=k+1$, we get one additional selection for the statement
1496 itself. Then we get one selection for each pair of the new selection and the
1497 previous $k$ statements. So the total number of selections will be the number
1498 of already generated selections, plus $k$ for every pair, plus one for the
1499 statement itself: $\frac{k(k+1)}{2} + k +
1500 1 = \frac{k(k+1)+2k+2}{2} = \frac{k(k+1)+2(k+1)}{2} = \frac{(k+1)(k+2)}{2} =
1501 \frac{(k+1)((k+1)+1)}{2} = \sum_{i=1}^{k+1} i$
1502\end{proof}
1503
1504%\definition{A \emph{body of statements} is a sequence of statements where every
1505%statement may have sub-statements.}
1506
1507\begin{theorem}
1508 The number of text selections for a body of statements is maximized if all the
1509 statements are at the same level.
1510 \label{thm:textSelectionsMaximized}
1511\end{theorem}
1512
1513\begin{proof}
1514 Assume we have a body of, in total, $k$ statements. Then, the sum of the
1515 lengths of all the lists of statements in the body, is also $k$. Let
1516 $\{l,\ldots,m,(k-l-\ldots-m)\}$ be the lengths of the lists of statements in
1517 the body, with $l+\ldots+m<k \Rightarrow \forall i \in \{l,\ldots,m\} : i < k$.
1518
1519 Then, the number of text selections that are generated for the $k$ statements
1520 is
1521
1522 {
1523 \small
1524 \begin{align*}
1525 \frac{l(l+1)}{2} + \ldots + \frac{m(m+1)}{2} +
1526 \frac{(k-l-\ldots-m)((k-l-\ldots-m)+ 1)}{2} = \\
1527 \frac{l^2+l}{2} + \ldots + \frac{m^2+m}{2} + \frac{k^2 - 2kl - \ldots - 2km +
1528 l^2 + \ldots + m^2 + k - l - \ldots - m}{2} = \\
1529 \frac{2l^2 - 2kl + \ldots + 2m^2 - 2km + k^2 + k}{2}
1530 \end{align*}
1531 }
1532
1533 \noindent It then remains to show that this inequality holds:
1534
1535 \begin{align*}
1536 \frac{2l^2 - 2kl + \ldots + 2m^2 - 2km + k^2 + k}{2} < \frac{k(k+1)}{2} =
1537 \frac{k^2 + k}{2}
1538 \end{align*}
1539
1540 \noindent By multiplication by $2$ on both sides, and by removing the equal
1541 parts, we get
1542
1543 \begin{align*}
1544 2l^2 - 2kl + \ldots + 2m^2 - 2km < 0
1545 \end{align*}
1546
1547 Since $\forall i \in \{l,\ldots,m\} : i < k$, we have that $\forall i \in
1548 \{l,\ldots,m\} : 2ki > 2i^2$, so all the pairs of parts on the form $2i^2-2ki$
1549 are negative. In sum, the inequality holds.
1550
1551\end{proof}
1552
1553Therefore, the complexity for the number of selections that needs to be analyzed
1554for a body of $n$ statements is $O\bigl(\frac{n(n+1)}{2}\bigr) = O(n^2)$.
1555
0cc6a67d 1556\section{Disqualifying a selection}
1c521a77
EK
1557Certain text selections would lead to broken code if used as input to the
1558\ExtractAndMoveMethod refactoring. To avoid this, we have to check all text
f5355077
EK
1559selections for such conditions before they are further analyzed. This section
1560is therefore going to present some properties that make a selection unsuitable
15327961 1561for our refactoring.
1c521a77
EK
1562
1563\subsection{A call to a protected or package-private method}
21506154
EK
1564If a text selection contains a call to a protected or package-private method, it
1565would not be safe to move it to another class. The reason for this, is that we
1566cannot know if the called method is being overridden by some subclass of the
1567\gloss{enclosingClass}, or not.
1c521a77
EK
1568
1569Imagine that the protected method \method{foo} is declared in class \m{A},
1570and overridden in class \m{B}. The method \method{foo} is called from within a
1571selection done to a method in \m{A}. We want to extract and move this selection
1572to another class. The method \method{foo} is not public, so the \MoveMethod
1573refactoring must make it public, making the extracted method able to call it
f5355077 1574from the extracted method's new location. The problem is, that the now public
1c521a77
EK
1575method \method{foo} is overridden in a subclass, where it has a protected
1576status. This makes the compiler complain that the subclass \m{B} is trying to
1577reduce the visibility of a method declared in its superclass \m{A}. This is not
21506154 1578allowed in Java, and for good reasons. It would make it possible to make a
1c521a77 1579subclass that could not be a substitute for its superclass.
1c521a77 1580
21506154
EK
1581The problem this check helps to avoid, is a little subtle. The problem does not
1582arise in the class where the change is done, but in a class derived from it.
1583This shows that classes acting as superclasses are especially fragile to
b4d90424
EK
1584introducing errors in the context of automated refactoring.
1585\begin{comment}
1586This is also shown in bug\ldots \todoin{File Eclipse bug report}
1587\end{comment}
21506154 1588
1fceb439
EK
1589\subsection{A double class instance creation}
1590The following is a problem caused solely by the underlying \MoveMethod
1591refactoring. The problem occurs if two classes are instantiated such that the
1592first constructor invocation is an argument to a second, and that the first
1593constructor invocation takes an argument that is built up using a field. As an
1594example, say that \var{name} is a field of the enclosing class, and we have the
1595expression \code{new A(new B(name))}. If this expression is located in a
1596selection that is moved to another class, \var{name} will be left untouched,
1597instead of being prefixed with a variable of the same type as it is declared in.
1598If \var{name} is the destination for the move, it is not replaced by
1599\code{this}, or removed if it is a prefix to a member access
1600(\code{name.member}), but it is still left by itself.
1601
1602Situations like this would lead to code that will not compile. Therefore, we
1603have to avoid them by not allowing selections to contain such double class
1604instance creations that also contains references to fields.
b4d90424 1605\begin{comment}
1fceb439 1606\todoin{File Eclipse bug report}
b4d90424 1607\end{comment}
1fceb439 1608
1c521a77
EK
1609\subsection{Instantiation of non-static inner class}
1610When a non-static inner class is instantiated, this must happen in the scope of
1611its declaring class. This is because it must have access to the members of the
1612declaring class. If the inner class is public, it is possible to instantiate it
1613through an instance of its declaring class, but this is not handled by the
1614underlying \MoveMethod refactoring.
1615
1616Performing a move on a method that instantiates a non-static inner class, will
1617break the code if the instantiation is not handled properly. For this reason,
1618selections that contains instantiations of non-static inner classes are deemed
1619unsuitable for the \ExtractAndMoveMethod refactoring.
1620
1621\subsection{References to enclosing instances of the enclosing class}
1622The title of this section may be a little hard to grasp at first. What it means
1623is that there is a (non-static) class \m{C} that is declared in the scope of
98b06ec2
EK
1624possibly multiple other classes. And there is a statement in the body of a
1625method declared in class \m{C}, that contains a reference to one or more
1626instances of these enclosing classes of \m{C}.
1c521a77
EK
1627
1628The problem with this, is that these references may not be valid if they are
1629moved to another class. Theoretically, some situations could easily be solved by
1630passing, to the moved method, a reference to the instance where the problematic
98b06ec2
EK
1631referenced member is declared. This should work in the case where this member is
1632publicly accessible. This is not done in the underlying \MoveMethod refactoring,
1633so it cannot be allowed in the \ExtractAndMoveMethod refactoring either.
1c521a77 1634
15327961 1635\subsection{Inconsistent return statements}
cb903a13
EK
1636To verify that a text selection is consistent with respect to return statements,
1637we must check that if a selection contains a return statement, then every
1638possible execution path within the selection ends in either a return or a throw
15327961
EK
1639statement. This property is important regarding the \ExtractMethod refactoring.
1640If it holds, it means that a method could be extracted from the selection, and a
1641call to it could be substituted for the selection. If the method has a non-void
1642return type, then a call to it would also be a valid return point for the
1643calling method. If its return value is of the void type, then the \ExtractMethod
1644refactoring will append an empty return statement to the back of the method
1645call. Therefore, the analysis does not discriminate on either kinds of return
1646statements, with or without a return value.
1647
cb903a13
EK
1648A throw statement is accepted anywhere a return statement is required. This is
1649because a throw statement causes an immediate exit from the current block,
1650together with all outer blocks in its control flow that does not catch the
1651thrown exception.
15327961
EK
1652
1653Return statements can be either explicit or implicit. An \emph{explicit} return
1654statement is formed by using the \code{return} keyword, while an \emph{implicit}
d59e3ab7
EK
1655return statement is a statement that is not formed using \code{return}, but must
1656be the last statement of a method that can have any side effects. This can
1657happen in methods with a void return type. An example is a statement that is
15327961 1658inside one or more blocks. The last statement of a method could for instance be
cb903a13
EK
1659a synchronized statement, but the last statement that is executed in the method,
1660and that can have any side effects, may be located inside the body of the
1661synchronized statement.
15327961
EK
1662
1663We can start the check for this property by looking at the last statement of a
d59e3ab7
EK
1664selection to see if it is a return statement (explicit or implicit) or a throw
1665statement. If this is the case, then the property holds, assuming the selected
1666code does not contain any compilation errors. All execution paths within the
1667selection should end in either this, or another, return or throw statement.
15327961
EK
1668\todoin{State somewhere that we assume no compilation errors?}
1669
1670If the last statement of the selection is not a return or throw, the execution
d59e3ab7
EK
1671of it must eventually end in one for the selection to be legal. This means that
1672all branches of the last statement of every branch must end in a return or
1673throw. Given this recursive definition, there are only five types of statements
1674that are guaranteed to end in a return or throw if their child branches does.
1675All other statements would have to be considered illegal. The first three:
1676Block-statements, labeled statements and do-statements are all kinds of
1677fall-through statements that always gets their body executed. Do-statements
1678would not make much sense if written such that they
15327961
EK
1679always ends after the first round of execution of their body, but that is not
1680our concern. The remaining two statements that can end in a return or throw are
1681if-statements and try-statements.
1682
cb903a13 1683For an if-statement, the rule is that if its then-part does not contain any
d59e3ab7 1684return or throw statements, this is considered illegal. If the then-part does
cb903a13
EK
1685contain a return or throw, the else-part is checked. If its else-part is
1686non-existent, or it does not contain any return or throw statements, the
1687statement is considered illegal. If an if-statement is not considered illegal,
d59e3ab7 1688the bodies of its two parts must be checked.
15327961 1689
cb903a13
EK
1690Try-statements are handled much the same way as if-statements. The body of a
1691try-statement must contain a return or throw. The same applies to its catch
1692clauses and finally body.
1c521a77 1693
11d9e6e1
EK
1694\subsection{Ambiguous return values}
1695The problem with ambiguous return values arise when a selection is chosen to be
1696extracted into a new method, but it needs to return more than one value from
1697that method.
1698
1699This problem occurs in two situations. The first situation arise when there is
1700more than one local variable that is both assigned to within a selection and
1701also referenced after the selection. The other situation occur when there is
1702only one such assignment, but the selection also contain return statements.
1703
1704Therefore we must examine the selection for assignments to local variables that
1705are referenced after the text selection. Then we must verify that not more than
1706one such reference is done, or zero if any return statements are found.
1707
83f12332
EK
1708\subsection{Illegal statements}
1709An illegal statement may be a statement that is of a type that is never allowed,
8fa89d14
EK
1710or it may be a statement of a type that is only allowed if certain conditions
1711are true.
83f12332
EK
1712
1713Any use of the \var{super} keyword is prohibited, since its meaning is altered
1714when moving a method to another class.
1715
1716For a \emph{break} statement, there are two situations to consider: A break
1717statement with or without a label. If the break statement has a label, it is
1718checked that whole of the labeled statement is inside the selection. If the
1719break statement does not have a label attached to it, it is checked that its
1720innermost enclosing loop or switch statement also is inside the selection.
1721
1722The situation for a \emph{continue} statement is the same as for a break
1723statement, except that it is not allowed inside switch statements.
1724
1725Regarding \emph{assignments}, two types of assignments are allowed: Assignments
1726to non-final variables and assignments to array access. All other assignments
1727are regarded illegal.
1728
1729\todoin{Expand with more illegal statements and/or conclude that I did not have
9503a520
EK
1730time to analyze all statement types.}
1731
01d46361
EK
1732\section{Disqualifying selections from the
1733example}\label{sec:disqualifyingExample}
9503a520
EK
1734Among the selections we found for the code in \myref{lst:grandExample}, not many
1735of them must be disqualified on the basis of containing something illegal. The
1736only statement causing trouble is the break statement in line 18. None of the
1737selections on nesting level 2 can contain this break statement, since the
1738innermost switch statement is not inside any of these selections.
83f12332 1739
9503a520
EK
1740This means that the text selections $(18)$, $(16,18)$ and $(17,18)$ can be
1741excluded from further consideration, and we are left with the following
1742selections.
81cf9554 1743
9503a520
EK
1744\begin{description}
1745 \item[Level 1 (10 selections)] \hfill \\
1746 $\{(5,9), (11), (12), (14,21), (5,11), (5,12), (5,21), (11,12),
1747 (11,21), \\(12,21)\}$
1748
1749 \item[Level 2 (10 selections)] \hfill \\
1750 $\{(6), (7), (8), (6,7), (6,8), (7,8), (16), (17), (16,17), (20)\}$
1751\end{description}
1752
1753\section{Finding a move target}
1754In the analysis needed to perform the \ExtractAndMoveMethod refactoring
1755automatically, the selection we choose is found among all the selections that
1756has a possible move target. Therefore, the best possible move target must be
1757found for all the candidate selections, so that we are able to sort out the
1758selection that is best suited for the refactoring.
1759
1760To find the best move target for a specific text selection, we first need to
1761find all the possible targets. Since the target must be a local variable or a
1762field, we are basically looking for names within the selection; names that
1763represents references to variables.
1764
1765The names we are looking for, we call prefixes. This is because we are not
1766interested in names that occur in the middle of a dot-separated sequence of
1767names. We are only interested in names that constitutes prefixes of other names,
1768possibly themselves. The reason for this, is that two lexically equal names need
1769not be referencing the same variable, if they themselves are not referenced via
1770the same prefix. Consider the two method calls \code{a.x.foo()} and
1771\code{b.x.foo()}. Here, the two references to \code{x}, in the middle of the
1772qualified names both preceding \code{foo()}, are not referencing the same
1773variable. Even though the variables may share the type, and the method
1774\method{foo} thus is the same for both, we would not know through which of the
1775variables \var{a} or \var{b} we should call the extracted method.
1776
1777The possible move targets are then the prefixes that are not among a subset of
1778the prefixes that are not valid move targets \see{s:unfixes}. Also, prefixes
1779that are just simple names, and have only one occurrence, are left out. This is
1780because they are not going to have any positive effect on coupling between
01d46361 1781classes, and are only going to increase the complexity of the code.
9503a520
EK
1782
1783For finding the best move target among these safe prefixes, a simple heuristic
1784is used. It is as simple as choosing the prefix that is most frequently
1785referenced within the selection.
1786
1787\section{Unfixes}\label{s:unfixes}
1788The prefixes that are not valid as move targets are called unfixes.
1789
1790An unfix can be a name that is assigned to within a selection. The reason that
1791this cannot be allowed, is that the result would be an assignment to the
1792\type{this} keyword, which is not valid in Java \see{eclipse_bug_420726}.
1793
1794Prefixes that originates from variable declarations within the same selection
1795are also considered unfixes. This is because when a method is moved, it needs to
1796be called through a variable. If this variable is also declared within the
1797method that is to be moved, this obviously cannot be done.
1798
1799Also considered as unfixes are variable references that are of types that are
1800not suitable for moving methods to. This can either be because it is not
1801physically possible to move a method to the desired class or that it will cause
1802compilation errors by doing so.
1803
1804If the type binding for a name is not resolved it is considered and unfix. The
1805same applies to types that is only found in compiled code, so they have no
1806underlying source that is accessible to us. (E.g. the \type{java.lang.String}
1807class.)
1808
1809Interfaces types are not suitable as targets. This is simply because interfaces
1810in Java cannot contain methods with bodies. (This thesis does not deal with
1811features of Java versions later than Java 7. Java 8 has interfaces with default
1812implementations of methods.)
1813
1814Neither are local types allowed. This accounts for both local and anonymous
1815classes. Anonymous classes are effectively the same as interface types with
1816respect to unfixes. Local classes could in theory be used as targets, but this
1817is not possible due to limitations of the way the \refa{Extract and Move Method}
1818refactoring has to be implemented. The problem is that the refactoring is done
1819in two steps, so the intermediate state between the two refactorings would not
1820be legal Java code. In the intermediate step for the case where a local class is
1821the move target, the extracted method would need to take the local class as a
1822parameter. This new method would need to live in the scope of the declaring
1823class of the originating method. The local class would then not be in the scope
1824of the extracted method, thus bringing the source code into an illegal state.
1825One could imagine that the method was extracted and moved in one operation,
1826without an intermediate state. Then it would make sense to include variables
1827with types of local classes in the set of legal targets, since the local classes
1828would then be in the scopes of the method calls. If this makes any difference
1829for software metrics that measure coupling would be a different discussion.
1830
1831
1832\begin{listing}[htb]
1833\begin{multicols}{2}
1834\begin{minted}[frame=topline,label=Before,framesep=\mintedframesep]{java}
1835void declaresLocalClass() {
1836 class LocalClass {
1837 void foo() {}
1838 void bar() {}
1839 }
1840
1841 LocalClass inst =
1842 new LocalClass();
1843 inst.foo();
1844 inst.bar();
1845}
1846\end{minted}
1847
1848\columnbreak
1849
1850\begin{minted}[frame=topline,label={After Extract
1851 Method},framesep=\mintedframesep]{java}
1852void declaresLocalClass() {
1853 class LocalClass {
1854 void foo() {}
1855 void bar() {}
1856 }
1857
1858 LocalClass inst =
1859 new LocalClass();
1860 fooBar(inst);
1861}
81cf9554 1862
9503a520
EK
1863// Intermediate step
1864void fooBar(LocalClass inst) {
1865 inst.foo();
1866 inst.bar();
1867}
1868\end{minted}
1869\end{multicols}
1870\caption{When the \refa{Extract and Move Method} tries to use a variable with a
1871local type as the move target, an intermediate step is performed that is not
1872allowed. Here: \type{LocalClass} is not in the scope of \method{fooBar} in its
1873intermediate location.}
1874\label{lst:extractMethod_LocalClass}
1875\end{listing}
83f12332 1876
9503a520 1877The last class of names that are considered unfixes are names used in null
01d46361
EK
1878tests. These are tests that reads like this: if \code{<name>} equals \var{null}
1879then do something. If allowing variables used in those kinds of expressions as
1880targets for moving methods, we would end up with code containing boolean
1881expressions like \code{this == null}, which would not be meaningful, since
1882\var{this} would never be \var{null}.
1883
4ea21603 1884\section{Finding the example selections that have possible targets}
01d46361
EK
1885We now pick up the thread from \myref{sec:disqualifyingExample} where we have a
1886set of text selections that needs to be analyzed to find out if some of them are
1887suitable targets for the \ExtractAndMoveMethod refactoring.
1888
1889We start by analyzing the text selections for nesting level 2, because these
1890results can be used to reason about the selections for nesting level 1. First we
1891have all the single-statement selections.
1892
1893\begin{description}
1894 \item[Selections $(6)$, $(8)$ and $(20)$.] \hfill \\
1895 All these selections have a prefix that contains a possible target, namely
e36eade0
EK
1896 the variable \var{a}. The problem is that the prefixes are only one segment
1897 long, and their frequency counts are only 1 as well. None of these
01d46361
EK
1898 selections are therefore considered as suitable candidates for the
1899 refactoring.
1900
1901 \item[Selection $(7)$.] \hfill \\
1902 This selection contains the unfix \var{a}, and no other possible targets.
1903 The reason for \var{a} being an unfix is that it is assigned to within the
1904 selection. Selection $(7)$ is therefore unsuited as a refactoring candidate.
1905
1906 \item[Selections $(16)$ and $(17)$.] \hfill \\
1907 These selections both have a possible target. The target for both selections
1908 is the variable \var{b}. Both the prefixes have frequency 1. We denote this
1909 with the new tuples $((16), \texttt{b.a}, f(1))$ and $((17), \texttt{b.a},
1910 f(1))$. They contain the selection, the prefix with the target and the
e36eade0 1911 frequency for this prefix.
01d46361
EK
1912
1913\end{description}
1914
1915Then we have all the text selections from level 2 that are composed of multiple
1916statements:
1917
1918\begin{description}
1919 \item[Selections $(6,7)$, $(6,8)$ and $(7,8)$.] \hfill \\
1920 All these selections are disqualified for the reason that they contain the
1921 unfix \var{a}, due to the assignment, and no other possible move targets.
1922
1923 \item[Selection $(16,17)$.] \hfill \\
e36eade0
EK
1924 This selection is the last selection that is not analyzed on nesting level
1925 2. It contains only one possible move target, and that is the variable
1926 \var{b}. It also contains only one prefix \var{b.a}, with frequency count
1927 2. Therefore we have a new candidate $((16,17), \texttt{b.a}, f(2))$.
01d46361
EK
1928
1929\end{description}
1930
1931Moving on to the text selections for nesting level 1, starting with the
1932single-statement selections:
1933
1934\begin{description}
1935 \item[Selection $(5,9)$.] \hfill \\
e36eade0 1936 This selection contains two variable references that must be analyzed to see
01d46361
EK
1937 if they are possible move candidates. The first one is the variable
1938 \var{bool}. This variable is of type \type{boolean}, that is a primary type
1939 and therefore not possible to make any changes to. The second variable is
1940 \var{a}. The variable \var{a} is an unfix in $(5,9)$, for the same reason as
1941 in the selections $(6,7)$, $(7,8)$ and $(6,8)$. So selection $(5,9)$
1942 contains no possible move targets.
1943
1944 \item[Selections $(11)$ and $(12)$.] \hfill \\
1945 These selections are disqualified for the same reasons as selections $(6)$
1946 and $(8)$. Their prefixes are one segment long and are referenced only one
1947 time.
1948
1949 \item[Selection $(14,21)$] \hfill \\
1950 This is the switch statement from \myref{lst:grandExample}. It contains the
1951 relevant variable references \var{val}, \var{a} and \var{b}. The variable
1952 \var{val} is a primary type, just as \var{bool}. The variable \var{a} is
1953 only found in one statement, and in a prefix with only one segment, so it is
1954 not considered to be a possible move target. The only variable left is
1955 \var{b}. Just as in the selection $(16,17)$, \var{b} is part of the prefix
e36eade0 1956 \code{b.a}, that has 2 appearances. We have found a new candidate $((14,21),
01d46361
EK
1957 \texttt{b.a}, f(2))$.
1958
1959\end{description}
1960
e36eade0 1961It remains to see if we get any new candidates by analyzing the multi-statement
01d46361 1962text selections for nesting level 1:
9503a520 1963
01d46361
EK
1964\begin{description}
1965 \item[Selections $(5,11)$ and $(5,12)$.] \hfill \\
e36eade0 1966 These selections are disqualified for the same reason as $(5,9)$. The only
01d46361
EK
1967 possible move target \var{a} is an unfix.
1968
1969 \item[Selection $(5,21)$.] \hfill \\
1970 This is whole of the method body in \myref{lst:grandExample}. The variables
1971 \var{a}, \var{bool} and \var{val} are either an unfix or primary types. The
1972 variable \var{b} is the only possible move target, and we have, again, the
1973 prefix \code{b.a} as the center of attention. The refactoring candidate is
1974 $((5,21), \texttt{b.a}, f(2))$.
1975
1976 \item[Selection $(11,12)$.] \hfill \\
1977 This small selection contains the prefix \code{a} with frequency 2, and no
1978 unfixes. The candidate is $((11,12), \texttt{a}, f(2))$.
1979
1980 \item[Selection $(11,21)$] \hfill \\
1981 This selection contains the selection $(11,12)$ in addition to the switch
1982 statement. The selection has two possible move targets. The first one is
e36eade0 1983 \var{b}, in a prefix with frequency 2. The second is \var{a}, although it
01d46361
EK
1984 is in a simple prefix, it is referenced 3 times, and is therefore chosen
1985 as the target for the selection. The new candidate is $((11,21),
1986 \texttt{a}, f(3))$.
1987
1988 \item[Selection $(12,21)$.] \hfill \\
1989 This selection is similar to the previous $(11,21)$, only that \var{a} now
e36eade0 1990 has frequency count 2. This means that the prefixes \code{a} and
01d46361
EK
1991 \code{b.a} both have the count 2. Of the two, \code{b.a} is preferred,
1992 since it has more segments than \code{a}. Thus the candidate for this
1993 selection is $((12,21), \texttt{b.a}, f(2))$.
1994
1995\end{description}
1996
1997For the method in \myref{lst:grandExample} we therefore have the following 8
1998candidates for the \ExtractAndMoveMethod refactoring: $\{((16), \texttt{b.a},
1999f(1)), ((17), \texttt{b.a}, f(1)), ((16,17), \texttt{b.a}, f(2)), ((14,21),
2000\texttt{b.a}, f(2)), \\ ((5,21), \texttt{b.a}, f(2)), ((11,12), \texttt{a},
2001f(2)), ((11,21), \texttt{a}, f(3)), ((12,21), \texttt{b.a}, f(2))\}$.
2002
2003It now only remains to specify an order for these candidates, so we can choose
2004the most suitable one to refactor.
9503a520 2005
01d46361
EK
2006
2007\section{Choosing the selection}\label{sec:choosingSelection}
9503a520
EK
2008When choosing a selection between the text selections that have possible move
2009targets, the selections need to be ordered. The criteria below are presented in
2010the order they are prioritized. If not one selection is favored over the other
2011for a concrete criterion, the selections are evaluated by the next criterion.
2012
2013\begin{enumerate}
2014 \item The first criterion that is evaluated is whether a selection contains
01d46361
EK
2015 any unfixes or not. If selection \m{A} contains no unfixes, while selection
2016 \m{B} does, selection \m{A} is favored over selection \m{B}. This is
2017 because, if we can, we want to avoid moving such as assignments and variable
2018 declarations. This is done under the assumption that, if possible, avoiding
2019 selections containing unfixes will make the code moved a little cleaner.
2020
2021 \item The second criterion that is evaluated is whether a selection contains
2022 multiple possible targets or not. If selection \m{A} has only one possible
2023 target, and selection \m{B} has multiple, selection \m{A} is favored. If
2024 both selections have multiple possible targets, they are considered equal
2025 with respect to this criterion. The rationale for this heuristic is that we
2026 would prefer not to introduce new couplings between classes when performing
2027 the \ExtractAndMoveMethod refactoring.
9503a520
EK
2028
2029 \item When evaluating this criterion, this is with the knowledge that
01d46361
EK
2030 selection \m{A} and \m{B} both have one possible target, or multiple
2031 possible targets. Then, if the move target candidate of selection \m{A} has
2032 a higher reference count than the target candidate of selection \m{B},
2033 selection \m{A} is favored. The reason for this is that we would like to
2034 move the selection that gets rid of the most references to another class.
9503a520
EK
2035
2036 \item The last criterion is that if the frequencies of the targets chosen for
2037 both selections are equal, the selection with the target that is part of the
2038 prefix with highest number of segments is favored. This is done to favor
2039 indirection.
2040
2041\end{enumerate}
2042
2043If none of the above mentioned criteria favors one selection over another, the
2044selections are considered to be equally good candidates for the
2045\ExtractAndMoveMethod refactoring.
0cc6a67d 2046
01d46361
EK
2047\section{Concluding the example}
2048For choosing one of the remaining selections, we need to order our candidates
2049after the criteria in the previous section. Below we have the candidates ordered
2050in descending order, with the ``best'' candidate first:
2051
2052\begin{multicols}{2}
2053\begin{enumerate}
2054 \item $((16,17), \texttt{b.a}, f(2))$
2055 \item $((11,12), \texttt{a}, f(2))$
2056 \item $((16), \texttt{b.a}, f(1))$
2057 \item $((17), \texttt{b.a}, f(1))$
2058
2059 % With unfixes:
2060 % Many possible targets
2061 \item $((11,21), \texttt{a}, f(3))$
2062 \item $((5,21), \texttt{b.a}, f(2))$
2063 \item $((12,21), \texttt{b.a}, f(2))$
2064 \item $((14,21), \texttt{b.a}, f(2))$
2065
2066\end{enumerate}
2067\end{multicols}
2068
2069\begin{comment}
20705 if (bool) {
20716 a.foo();
20727 a = new A();
20738 a.bar();
20749 }
207510
207611 a.foo();
207712 a.bar();
207813
207914 switch (val) {
208015 case 1:
208116 b.a.foo();
208217 b.a.bar();
208318 break;
208419 default:
208520 a.foo();
208621 }
2087\end{comment}
2088
9db7f4cf
EK
2089The candidates ordered 5-8 all have unfixes in them, therefore they are ordered
2090last. None of the candidates ordered 1-4 have multiple possible targets. The
2091only interesting discussion is now why $(16,17)$ was favored over $(11,12)$.
2092This is because the prefix \code{b.a} enclosing the move target of selection
2093$(16,17)$ has one more segment than the prefix \code{a} of $(11,12)$.
2094
2095The selection is now extracted into a new method \method{gen\_123} and then
2096moved to class \type{B}, since that is the type of the variable \var{b} that is
2097the target from the chosen refactoring candidate. The name of the method has a
2098randomly generated suffix. In the refactoring implementation, the extracted
2099methods follow the pattern \code{generated\_<long>}, where \code{<long>} is a
2100pseudo-random long value. This is shortened here to make the example readable.
e36eade0 2101The result after the refactoring is shown in \myref{lst:grandExampleResult}.
01d46361
EK
2102
2103\begin{listing}[htb]
2104 \begin{multicols}{2}
2105 \begin{minted}[linenos]{java}
2106class C {
9db7f4cf
EK
2107 A a; B b; boolean bool;
2108
2109 void method(int val) {
2110 if (bool) {
2111 a.foo();
2112 a = new A();
2113 a.bar();
2114 }
2115
2116 a.foo();
2117 a.bar();
2118
2119 switch (val) {
2120 case 1:
2121 b.gen_123(this);
2122 break;
2123 default:
2124 a.foo();
01d46361 2125 }
9db7f4cf 2126 }
01d46361
EK
2127}
2128\end{minted}
2129
2130\columnbreak
2131
2132 \begin{minted}[]{java}
2133public class B {
9db7f4cf 2134 A a;
01d46361 2135
9db7f4cf
EK
2136 public void gen_123(C c) {
2137 a.foo();
2138 a.bar();
2139 }
01d46361
EK
2140}
2141\end{minted}
2142
2143 \end{multicols}
9db7f4cf
EK
2144 \caption{The result after refactoring the class \type{C} of
2145 \myref{lst:grandExample} with the \ExtractAndMoveMethod refactoring with
2146 $((16,17), \texttt{b.a}, f(2))$ as input.}
01d46361
EK
2147\label{lst:grandExampleResult}
2148\end{listing}
2149
b851f7df
EK
2150\chapter{Refactorings in Eclipse JDT: Design and
2151Shortcomings}\label{ch:jdt_refactorings}
5837a41f
EK
2152
2153This chapter will deal with some of the design behind refactoring support in
b4d90424
EK
2154\name{Eclipse}, and the JDT in specific. After which it will follow a section
2155about shortcomings of the refactoring API in terms of composition of
2156refactorings.
055dca93 2157
b0e80574 2158\section{Design}
fe0a4c48 2159The refactoring world of \name{Eclipse} can in general be separated into two parts: The
b289552b 2160language independent part and the part written for a specific programming
07e173d4
EK
2161language -- the language that is the target of the supported refactorings.
2162\todo{What about the language specific part?}
f041551b
EK
2163
2164\subsection{The Language Toolkit}
1326eec6
EK
2165The Language Toolkit\footnote{The content of this section is a mixture of
2166 written material from
2167 \url{https://www.eclipse.org/articles/Article-LTK/ltk.html} and
2168 \url{http://www.eclipse.org/articles/article.php?file=Article-Unleashing-the-Power-of-Refactoring/index.html},
2169the LTK source code and my own memory.}, or LTK for short, is the framework that
fe0a4c48 2170is used to implement refactorings in \name{Eclipse}. It is language independent and
1326eec6
EK
2171provides the abstractions of a refactoring and the change it generates, in the
2172form of the classes \typewithref{org.eclipse.ltk.core.refactoring}{Refactoring}
2173and \typewithref{org.eclipse.ltk.core.refactoring}{Change}.
2174
2175There are also parts of the LTK that is concerned with user interaction, but
2176they will not be discussed here, since they are of little value to us and our
2177use of the framework. We are primarily interested in the parts that can be
2178automated.
f041551b
EK
2179
2180\subsubsection{The Refactoring Class}
2181The abstract class \type{Refactoring} is the core of the LTK framework. Every
2182refactoring that is going to be supported by the LTK have to end up creating an
2183instance of one of its subclasses. The main responsibilities of subclasses of
2184\type{Refactoring} is to implement template methods for condition checking
2185(\methodwithref{org.eclipse.ltk.core.refactoring.Refactoring}{checkInitialConditions}
2186and
2187\methodwithref{org.eclipse.ltk.core.refactoring.Refactoring}{checkFinalConditions}),
2188in addition to the
2189\methodwithref{org.eclipse.ltk.core.refactoring.Refactoring}{createChange}
07e173d4
EK
2190method that creates and returns an instance of the \type{Change} class.
2191
2192If the refactoring shall support that others participate in it when it is
2193executed, the refactoring has to be a processor-based
2194refactoring\typeref{org.eclipse.ltk.core.refactoring.participants.ProcessorBasedRefactoring}.
2195It then delegates to its given
2196\typewithref{org.eclipse.ltk.core.refactoring.participants}{RefactoringProcessor}
1326eec6
EK
2197for condition checking and change creation. Participating in a refactoring can
2198be useful in cases where the changes done to programming source code affects
2199other related resources in the workspace. This can be names or paths in
2200configuration files, or maybe one would like to perform additional logging of
2201changes done in the workspace.
f041551b
EK
2202
2203\subsubsection{The Change Class}
07e173d4
EK
2204This class is the base class for objects that is responsible for performing the
2205actual workspace transformations in a refactoring. The main responsibilities for
2206its subclasses is to implement the
2207\methodwithref{org.eclipse.ltk.core.refactoring.Change}{perform} and
2208\methodwithref{org.eclipse.ltk.core.refactoring.Change}{isValid} methods. The
2209\method{isValid} method verifies that the change object is valid and thus can be
2210executed by calling its \method{perform} method. The \method{perform} method
2211performs the desired change and returns an undo change that can be executed to
2212reverse the effect of the transformation done by its originating change object.
2213
61420ef7 2214\subsubsection{Executing a Refactoring}\label{executing_refactoring}
07e173d4
EK
2215The life cycle of a refactoring generally follows two steps after creation:
2216condition checking and change creation. By letting the refactoring object be
2217handled by a
2218\typewithref{org.eclipse.ltk.core.refactoring}{CheckConditionsOperation} that
2219in turn is handled by a
2220\typewithref{org.eclipse.ltk.core.refactoring}{CreateChangeOperation}, it is
2221assured that the change creation process is managed in a proper manner.
2222
2223The actual execution of a change object has to follow a detailed life cycle.
2224This life cycle is honored if the \type{CreateChangeOperation} is handled by a
2225\typewithref{org.eclipse.ltk.core.refactoring}{PerformChangeOperation}. If also
2226an undo manager\typeref{org.eclipse.ltk.core.refactoring.IUndoManager} is set
2227for the \type{PerformChangeOperation}, the undo change is added into the undo
2228history.
055dca93 2229
b0e80574 2230\section{Shortcomings}
80663734 2231This section is introduced naturally with a conclusion: The JDT refactoring
5837a41f
EK
2232implementation does not facilitate composition of refactorings.
2233\todo{refine}This section will try to explain why, and also identify other
2234shortcomings of both the usability and the readability of the JDT refactoring
2235source code.
80663734
EK
2236
2237I will begin at the end and work my way toward the composition part of this
2238section.
2239
5837a41f 2240\subsection{Absence of Generics in Eclipse Source Code}
80663734 2241This section is not only concerning the JDT refactoring API, but also large
fe0a4c48 2242quantities of the \name{Eclipse} source code. The code shows a striking absence of the
80663734 2243Java language feature of generics. It is hard to read a class' interface when
5837a41f
EK
2244methods return objects or takes parameters of raw types such as \type{List} or
2245\type{Map}. This sometimes results in having to read a lot of source code to
2246understand what is going on, instead of relying on the available interfaces. In
2247addition, it results in a lot of ugly code, making the use of typecasting more
2248of a rule than an exception.
2249
2250\subsection{Composite Refactorings Will Not Appear as Atomic Actions}
2251
2252\subsubsection{Missing Flexibility from JDT Refactorings}
2253The JDT refactorings are not made with composition of refactorings in mind. When
2254a JDT refactoring is executed, it assumes that all conditions for it to be
1326eec6 2255applied successfully can be found by reading source files that have been
5837a41f
EK
2256persisted to disk. They can only operate on the actual source material, and not
2257(in-memory) copies thereof. This constitutes a major disadvantage when trying to
1326eec6
EK
2258compose refactorings, since if an exception occurs in the middle of a sequence
2259of refactorings, it can leave the project in a state where the composite
2260refactoring was only partially executed. It makes it hard to discard the changes
5837a41f
EK
2261done without monitoring and consulting the undo manager, an approach that is not
2262bullet proof.
2263
2264\subsubsection{Broken Undo History}
2265When designing a composed refactoring that is to be performed as a sequence of
2266refactorings, you would like it to appear as a single change to the workspace.
2267This implies that you would also like to be able to undo all the changes done by
2268the refactoring in a single step. This is not the way it appears when a sequence
2269of JDT refactorings is executed. It leaves the undo history filled up with
2270individual undo actions corresponding to every single JDT refactoring in the
fe0a4c48 2271sequence. This problem is not trivial to handle in \name{Eclipse}
e123ab03 2272\see{hacking_undo_history}.
5837a41f 2273
80663734 2274
a7514fbd 2275
b0e80574
EK
2276\chapter{Composite Refactorings in Eclipse}
2277
2278\section{A Simple Ad Hoc Model}
fe0a4c48 2279As pointed out in \myref{ch:jdt_refactorings}, the \name{Eclipse} JDT refactoring model
8b6b22c8
EK
2280is not very well suited for making composite refactorings. Therefore a simple
2281model using changer objects (of type \type{RefaktorChanger}) is used as an
fe0a4c48 2282abstraction layer on top of the existing \name{Eclipse} refactorings, instead of
50954fde
EK
2283extending the \typewithref{org.eclipse.ltk.core.refactoring}{Refactoring} class.
2284
2285The use of an additional abstraction layer is a deliberate choice. It is due to
2286the problem of creating a composite
2287\typewithref{org.eclipse.ltk.core.refactoring}{Change} that can handle text
2288changes that interfere with each other. Thus, a \type{RefaktorChanger} may, or
2289may not, take advantage of one or more existing refactorings, but it is always
2290intended to make a change to the workspace.
2291
2292\subsection{A typical \type{RefaktorChanger}}
2293The typical refaktor changer class has two responsibilities, checking
2294preconditions and executing the requested changes. This is not too different
2295from the responsibilities of an LTK refactoring, with the distinction that a
2296refaktor changer also executes the change, while an LTK refactoring is only
2297responsible for creating the object that can later be used to do the job.
2298
2299Checking of preconditions is typically done by an
2300\typewithref{no.uio.ifi.refaktor.analyze.analyzers}{Analyzer}. If the
2301preconditions validate, the upcoming changes are executed by an
2302\typewithref{no.uio.ifi.refaktor.change.executors}{Executor}.
b0e80574
EK
2303
2304\section{The Extract and Move Method Refactoring}
61420ef7
EK
2305%The Extract and Move Method Refactoring is implemented mainly using these
2306%classes:
2307%\begin{itemize}
2308% \item \type{ExtractAndMoveMethodChanger}
2309% \item \type{ExtractAndMoveMethodPrefixesExtractor}
2310% \item \type{Prefix}
2311% \item \type{PrefixSet}
2312%\end{itemize}
2313
2314\subsection{The Building Blocks}
2315This is a composite refactoring, and hence is built up using several primitive
b5c7bb1b
EK
2316refactorings. These basic building blocks are, as its name implies, the
2317\ExtractMethod refactoring\citing{refactoring} and the \MoveMethod
fe0a4c48 2318refactoring\citing{refactoring}. In \name{Eclipse}, the implementations of these
b5c7bb1b 2319refactorings are found in the classes
61420ef7
EK
2320\typewithref{org.eclipse.jdt.internal.corext.refactoring.code}{ExtractMethodRefactoring}
2321and
2322\typewithref{org.eclipse.jdt.internal.corext.refactoring.structure}{MoveInstanceMethodProcessor},
2323where the last class is designed to be used together with the processor-based
2324\typewithref{org.eclipse.ltk.core.refactoring.participants}{MoveRefactoring}.
2325
2326\subsubsection{The ExtractMethodRefactoring Class}
2327This class is quite simple in its use. The only parameters it requires for
2328construction is a compilation
2329unit\typeref{org.eclipse.jdt.core.ICompilationUnit}, the offset into the source
2330code where the extraction shall start, and the length of the source to be
2331extracted. Then you have to set the method name for the new method together with
50954fde 2332its visibility and some not so interesting parameters.
61420ef7
EK
2333
2334\subsubsection{The MoveInstanceMethodProcessor Class}
fe0a4c48
EK
2335For the \refa{Move Method}, the processor requires a little more advanced input than
2336the class for the \refa{Extract Method}. For construction it requires a method
50954fde
EK
2337handle\typeref{org.eclipse.jdt.core.IMethod} for the method that is to be moved.
2338Then the target for the move have to be supplied as the variable binding from a
2339chosen variable declaration. In addition to this, one have to set some
2340parameters regarding setters/getters, as well as delegation.
61420ef7 2341
50954fde
EK
2342To make a working refactoring from the processor, one have to create a
2343\type{MoveRefactoring} with it.
b0e80574 2344
356782a0 2345\subsection{The ExtractAndMoveMethodChanger}
50954fde 2346
61420ef7 2347The \typewithref{no.uio.ifi.refaktor.changers}{ExtractAndMoveMethodChanger}
50954fde
EK
2348class is a subclass of the class
2349\typewithref{no.uio.ifi.refaktor.changers}{RefaktorChanger}. It is responsible
2350for analyzing and finding the best target for, and also executing, a composition
fe0a4c48 2351of the \refa{Extract Method} and \refa{Move Method} refactorings. This particular changer is
50954fde
EK
2352the one of my changers that is closest to being a true LTK refactoring. It can
2353be reworked to be one if the problems with overlapping changes are resolved. The
2354changer requires a text selection and the name of the new method, or else a
2355method name will be generated. The selection has to be of the type
2356\typewithref{no.uio.ifi.refaktor.utils}{CompilationUnitTextSelection}. This
2357class is a custom extension to
2358\typewithref{org.eclipse.jface.text}{TextSelection}, that in addition to the
2359basic offset, length and similar methods, also carry an instance of the
2360underlying compilation unit handle for the selection.
2361
ae138b9d
EK
2362\subsubsection{The
2363 \type{ExtractAndMoveMethodAnalyzer}}\label{extractAndMoveMethodAnalyzer}
50954fde
EK
2364The analysis and precondition checking is done by the
2365\typewithref{no.uio.ifi.refaktor.analyze.analyzers}{ExtractAnd\-MoveMethodAnalyzer}.
2366First is check whether the selection is a valid selection or not, with respect
2367to statement boundaries and that it actually contains any selections. Then it
2368checks the legality of both extracting the selection and also moving it to
95c0f364
EK
2369another class. This checking of is performed by a range of checkers
2370\see{checkers}. If the selection is approved as legal, it is analyzed to find
2371the presumably best target to move the extracted method to.
50954fde
EK
2372
2373For finding the best suitable target the analyzer is using a
2374\typewithref{no.uio.ifi.refaktor.analyze.collectors}{PrefixesCollector} that
ae138b9d
EK
2375collects all the possible candidate targets for the refactoring. All the
2376non-candidates is found by an
50954fde 2377\typewithref{no.uio.ifi.refaktor.analyze.collectors}{UnfixesCollector} that
b8fce5af 2378collects all the targets that will give some kind of error if used. (For
3ab3e132 2379details about the property collectors, see \myref{propertyCollectors}.) All
b8fce5af 2380prefixes (and unfixes) are represented by a
a6415293
EK
2381\typewithref{no.uio.ifi.refaktor.extractors}{Prefix}, and they are collected
2382into sets of prefixes. The safe prefixes is found by subtracting from the set of
b8fce5af 2383candidate prefixes the prefixes that is enclosing any of the unfixes. A prefix
a6415293 2384is enclosing an unfix if the unfix is in the set of its sub-prefixes. As an
01d46361
EK
2385example, \code{``a.b''} is enclosing \code{``a''}, as is \code{``a''}. The safe
2386prefixes is unified in a \type{PrefixSet}. If a prefix has only one occurrence,
2387and is a simple expression, it is considered unsuitable as a move target. This
2388occurs in statements such as \code{``a.foo()''}. For such statements it bares no
2389meaning to extract and move them. It only generates an extra method and the
2390calling of it.
50954fde 2391
fbaa89ce
EK
2392The most suitable target for the refactoring is found by finding the prefix with
2393the most occurrences. If two prefixes have the same occurrence count, but they
01d46361 2394differ in the number of segments, the one with most segments is chosen.
fbaa89ce 2395
c8088eec
EK
2396\subsubsection{The
2397 \type{ExtractAndMoveMethodExecutor}}\label{extractAndMoveMethodExecutor}
50954fde
EK
2398If the analysis finds a possible target for the composite refactoring, it is
2399executed by an
2400\typewithref{no.uio.ifi.refaktor.change.executors}{ExtractAndMoveMethodExecutor}.
2401It is composed of the two executors known as
2402\typewithref{no.uio.ifi.refaktor.change.executors}{ExtractMethodRefactoringExecutor}
2403and
2404\typewithref{no.uio.ifi.refaktor.change.executors}{MoveMethodRefactoringExecutor}.
2405The \type{ExtractAndMoveMethodExecutor} is responsible for gluing the two
3727b75b 2406together by feeding the \type{MoveMethod\-RefactoringExecutor} with the
222d172b 2407resources needed after executing the extract method refactoring.
50954fde
EK
2408
2409\subsubsection{The \type{ExtractMethodRefactoringExecutor}}
2410This executor is responsible for creating and executing an instance of the
2411\type{ExtractMethodRefactoring} class. It is also responsible for collecting
2412some post execution resources that can be used to find the method handle for the
2413extracted method, as well as information about its parameters, including the
2414variable they originated from.
2415
2416\subsubsection{The \type{MoveMethodRefactoringExecutor}}
2417This executor is responsible for creating and executing an instance of the
2418\type{MoveRefactoring}. The move refactoring is a processor-based refactoring,
fe0a4c48 2419and for the \refa{Move Method} refactoring it is the \type{MoveInstanceMethodProcessor}
50954fde
EK
2420that is used.
2421
2422The handle for the method to be moved is found on the basis of the information
fe0a4c48 2423gathered after the execution of the \refa{Extract Method} refactoring. The only
50954fde
EK
2424information the \type{ExtractMethodRefactoring} is sharing after its execution,
2425regarding find the method handle, is the textual representation of the new
2426method signature. Therefore it must be parsed, the strings for types of the
2427parameters must be found and translated to a form that can be used to look up
2428the method handle from its type handle. They have to be on the unresolved
2429form.\todo{Elaborate?} The name for the type is found from the original
2430selection, since an extracted method must end up in the same type as the
2431originating method.
2432
fe0a4c48 2433When analyzing a selection prior to performing the \refa{Extract Method} refactoring, a
50954fde
EK
2434target is chosen. It has to be a variable binding, so it is either a field or a
2435local variable/parameter. If the target is a field, it can be used with the
2436\type{MoveInstanceMethodProcessor} as it is, since the extracted method still is
2437in its scope. But if the target is local to the originating method, the target
2438that is to be used for the processor must be among its parameters. Thus the
2439target must be found among the extracted method's parameters. This is done by
2440finding the parameter information object that corresponds to the parameter that
2441was declared on basis of the original target's variable when the method was
2442extracted. (The extracted method must take one such parameter for each local
2443variable that is declared outside the selection that is extracted.) To match the
2444original target with the correct parameter information object, the key for the
2445information object is compared to the key from the original target's binding.
2446The source code must then be parsed to find the method declaration for the
2447extracted method. The new target must be found by searching through the
2448parameters of the declaration and choose the one that has the same type as the
2449old binding from the parameter information object, as well as the same name that
2450is provided by the parameter information object.
2451
2452
356782a0
EK
2453\subsection{The
2454SearchBasedExtractAndMoveMethodChanger}\label{searchBasedExtractAndMoveMethodChanger}
c8088eec
EK
2455The
2456\typewithref{no.uio.ifi.refaktor.change.changers}{SearchBasedExtractAndMoveMethodChanger}
2457is a changer whose purpose is to automatically analyze a method, and execute the
2458\ExtractAndMoveMethod refactoring on it if it is a suitable candidate for the
2459refactoring.
2460
2461First, the \typewithref{no.uio.ifi.refaktor.analyze.analyzers}{SearchBasedExtractAndMoveMethodAnalyzer} is used
2462to analyze the method. If the method is found to be a candidate, the result from
2463the analysis is fed to the \type{ExtractAndMoveMethodExecutor}, whose job is to
2464execute the refactoring \see{extractAndMoveMethodExecutor}.
2465
2466\subsubsection{The SearchBasedExtractAndMoveMethodAnalyzer}
2467This analyzer is responsible for analyzing all the possible text selections of a
2468method and then choose the best result out of the analysis results that is, by
2469the analyzer, considered to be the potential candidates for the Extract and Move
2470Method refactoring.
2471
2472Before the analyzer is able to work with the text selections of a method, it
2473needs to generate them. To do this, it parses the method to obtain a
2474\type{MethodDeclaration} for it \see{astEclipse}. Then there is a statement
2475lists creator that creates statements lists of the different groups of
2476statements in the body of the method declaration. A text selections generator
2477generates text selections of all the statement lists for the analyzer to work
2478with.
2479
2480\paragraph{The statement lists creator}
b851f7df
EK
2481is responsible for generating lists of statements for all the possible nesting
2482levels of statements in the method. The statement lists creator is implemented
2483as an AST visitor \see{astVisitor}. It generates lists of statements by visiting
2484all the blocks in the method declaration and stores their statements in a
2485collection of statement lists. In addition, it visits all of the other
2486statements that can have a statement as a child, such as the different control
2487structures and the labeled statement.
c8088eec
EK
2488
2489The switch statement is the only kind of statement that is not straight forward
2490to obtain the child statements from. It stores all of its children in a flat
2491list. Its switch case statements are included in this list. This means that
2492there are potential statement lists between all of these case statements. The
2493list of statements from a switch statement is therefore traversed, and the
2494statements between the case statements are grouped as separate lists.
2495
44005fe3
EK
2496The highlighted part of \myref{lst:grandExample} shows an example of how the
2497statement lists creator would separate a method body into lists of statements.
c8088eec
EK
2498
2499\paragraph{The text selections generator} generates text selections for each
b851f7df
EK
2500list of statements from the statement lists creator. The generator generates a
2501text selection for every sub-sequence of statements in a list. For a sequence of
2502statements, the first statement and the last statement span out a text
2503selection.
c8088eec
EK
2504
2505In practice, the text selections are calculated by only one traversal of the
2506statement list. There is a set of generated text selections. For each statement,
2507there is created a temporary set of selections, in addition to a text selection
2508based on the offset and length of the statement. This text selection is added to
2509the temporary set. Then the new selection is added with every selection from the
2510set of generated text selections. These new selections are added to the
2511temporary set. Then the temporary set of selections is added to the set of
2512generated text selections. The result of adding two text selections is a new
2513text selection spanned out by the two addends.
2514
53be7239 2515\begin{comment}
0fa64de5
EK
2516\begin{listing}[h]
2517\def\charwidth{5.7pt}
2518\def\indent{4*\charwidth}
2519\def\lineheight{\baselineskip}
2520\def\mintedtop{\lineheight}
2521
2522\begin{tikzpicture}[overlay, yscale=-1]
2523 \tikzstyle{overlaybox}=[fill=lightgray,opacity=0.2]
2524
2525 \draw[overlaybox] (2*\charwidth,\mintedtop) rectangle
2526 +(18*\charwidth,\lineheight);
2527
2528 \draw[overlaybox] (2*\charwidth,\mintedtop+\lineheight) rectangle
2529 +(18*\charwidth,\lineheight);
2530
2531 \draw[overlaybox] (2*\charwidth,\mintedtop+3*\lineheight) rectangle
2532 +(18*\charwidth,\lineheight);
2533
2534 \draw[overlaybox] (\indent-3*\charwidth,\mintedtop) rectangle
2535 +(20*\charwidth,2*\lineheight);
2536
2537 \draw[overlaybox] (3*\charwidth,\mintedtop+\lineheight) rectangle
2538 +(16*\charwidth,3*\lineheight);
2539
2540 \draw[overlaybox] (\indent,\mintedtop) rectangle
2541 +(14*\charwidth,4*\lineheight);
2542\end{tikzpicture}
2543\begin{minted}{java}
2544 statement one;
2545 statement two;
2546 ...
2547 statement k;
2548\end{minted}
2549\caption{Example of how the text selections generator would generate text
2550 selections based on a lists of statements. Each highlighted rectangle
2551represents a text selection.}
2552\label{lst:textSelectionsExample}
2553\end{listing}
021508ad
EK
2554\todoin{fix \myref{lst:textSelectionsExample}? Text only? All
2555sub-sequences\ldots}
53be7239 2556\end{comment}
0fa64de5 2557
ae138b9d
EK
2558\paragraph{Finding the candidate} for the refactoring is done by analyzing all
2559the generated text selection with the \type{ExtractAndMoveMethodAnalyzer}
2560\see{extractAndMoveMethodAnalyzer}. If the analyzer generates a useful result,
2561an \type{ExtractAndMoveMethodCandidate} is created from it, that is kept in a
2562list of potential candidates. If no candidates are found, the
2563\type{NoTargetFoundException} is thrown.
2564
2565Since only one of the candidates can be chosen, the analyzer must sort out which
2566candidate to choose. The sorting is done by the static \method{sort} method of
2567\type{Collections}. The comparison in this sorting is done by an
2568\type{ExtractAndMoveMethodCandidateComparator}.
2569\todoin{Write about the
2570ExtractAndMoveMethodCandidateComparator/FavorNoUnfixesCandidateComparator}
2571
356782a0 2572
b0e80574 2573\subsection{The Prefix Class}
a6415293
EK
2574This class exists mainly for holding data about a prefix, such as the expression
2575that the prefix represents and the occurrence count of the prefix within a
2576selection. In addition to this, it has some functionality such as calculating
2577its sub-prefixes and intersecting it with another prefix. The definition of the
2578intersection between two prefixes is a prefix representing the longest common
2579expression between the two.
2580
b0e80574 2581\subsection{The PrefixSet Class}
a6415293
EK
2582A prefix set holds elements of type \type{Prefix}. It is implemented with the
2583help of a \typewithref{java.util}{HashMap} and contains some typical set
2584operations, but it does not implement the \typewithref{java.util}{Set}
2585interface, since the prefix set does not need all of the functionality a
2586\type{Set} requires to be implemented. In addition It needs some other
2587functionality not found in the \type{Set} interface. So due to the relatively
2588limited use of prefix sets, and that it almost always needs to be referenced as
2589such, and not a \type{Set<Prefix>}, it remains as an ad hoc solution to a
2590concrete problem.
2591
2592There are two ways adding prefixes to a \type{PrefixSet}. The first is through
2593its \method{add} method. This works like one would expect from a set. It adds
2594the prefix to the set if it does not already contain the prefix. The other way
2595is to \emph{register} the prefix with the set. When registering a prefix, if the
2596set does not contain the prefix, it is just added. If the set contains the
2597prefix, its count gets incremented. This is how the occurrence count is handled.
2598
2599The prefix set also computes the set of prefixes that is not enclosing any
2600prefixes of another set. This is kind of a set difference operation only for
2601enclosing prefixes.
b0e80574 2602
5837a41f
EK
2603\subsection{Hacking the Refactoring Undo
2604History}\label{hacking_undo_history}
a6415293 2605\todoin{Where to put this section?}
8fae7b44
EK
2606
2607As an attempt to make multiple subsequent changes to the workspace appear as a
2608single action (i.e. make the undo changes appear as such), I tried to alter
2609the undo changes\typeref{org.eclipse.ltk.core.refactoring.Change} in the history
2610of the refactorings.
2611
2612My first impulse was to remove the, in this case, last two undo changes from the
f041551b 2613undo manager\typeref{org.eclipse.ltk.core.refactoring.IUndoManager} for the
fe0a4c48 2614\name{Eclipse} refactorings, and then add them to a composite
8fae7b44
EK
2615change\typeref{org.eclipse.ltk.core.refactoring.CompositeChange} that could be
2616added back to the manager. The interface of the undo manager does not offer a
2617way to remove/pop the last added undo change, so a possible solution could be to
4cb06723
EK
2618decorate\citing{designPatterns} the undo manager, to intercept and collect the
2619undo changes before delegating to the \method{addUndo}
f041551b 2620method\methodref{org.eclipse.ltk.core.refactoring.IUndoManager}{addUndo} of the
8fae7b44
EK
2621manager. Instead of giving it the intended undo change, a null change could be
2622given to prevent it from making any changes if run. Then one could let the
2623collected undo changes form a composite change to be added to the manager.
2624
2625There is a technical challenge with this approach, and it relates to the undo
2626manager, and the concrete implementation
94fc7e71
EK
2627\typewithref{org.eclipse.ltk.internal.core.refactoring}{UndoManager2}. This
2628implementation is designed in a way that it is not possible to just add an undo
2629change, you have to do it in the context of an active
8fae7b44
EK
2630operation\typeref{org.eclipse.core.commands.operations.TriggeredOperations}.
2631One could imagine that it might be possible to trick the undo manager into
2632believing that you are doing a real change, by executing a refactoring that is
2633returning a kind of null change that is returning our composite change of undo
94fc7e71 2634refactorings when it is performed. But this is not the way to go.
8fae7b44
EK
2635
2636Apart from the technical problems with this solution, there is a functional
2637problem: If it all had worked out as planned, this would leave the undo history
2638in a dirty state, with multiple empty undo operations corresponding to each of
2639the sequentially executed refactoring operations, followed by a composite undo
2640change corresponding to an empty change of the workspace for rounding of our
2641composite refactoring. The solution to this particular problem could be to
2642intercept the registration of the intermediate changes in the undo manager, and
2643only register the last empty change.
2644
2645Unfortunately, not everything works as desired with this solution. The grouping
2646of the undo changes into the composite change does not make the undo operation
2647appear as an atomic operation. The undo operation is still split up into
2648separate undo actions, corresponding to the change done by its originating
2649refactoring. And in addition, the undo actions has to be performed separate in
2650all the editors involved. This makes it no solution at all, but a step toward
2651something worse.
2652
2653There might be a solution to this problem, but it remains to be found. The
2654design of the refactoring undo management is partly to be blamed for this, as it
2655it is to complex to be easily manipulated.
2656
b0e80574 2657
0d7fbd88 2658
2f2080fb 2659
03674629 2660\chapter{Analyzing Source Code in Eclipse}
5308274d 2661
356782a0 2662\section{The Java model}\label{javaModel}
fe0a4c48 2663The Java model of \name{Eclipse} is its internal representation of a Java project. It
5308274d 2664is light-weight, and has only limited possibilities for manipulating source
fe0a4c48 2665code. It is typically used as a basis for the Package Explorer in \name{Eclipse}.
5308274d
EK
2666
2667The elements of the Java model is only handles to the underlying elements. This
2668means that the underlying element of a handle does not need to actually exist.
2669Hence the user of a handle must always check that it exist by calling the
2670\method{exists} method of the handle.
2671
8647eef7
EK
2672The handles with descriptions is listed in \myref{tab:javaModel}, while the
2673hierarchy of the Java Model is shown in \myref{fig:javaModel}.
4e468834 2674
8647eef7
EK
2675\begin{table}[htb]
2676 \caption{The elements of the Java Model\citing{vogelEclipseJDT2012}.}
2677 \label{tab:javaModel}
4e468834 2678 \centering
4e468834 2679 % sum must equal number of columns (3)
8647eef7
EK
2680 \begin{tabularx}{\textwidth}{@{} L{0.7} L{1.1} L{1.2} @{}}
2681 \toprule
4e468834
EK
2682 \textbf{Project Element} & \textbf{Java Model element} &
2683 \textbf{Description} \\
8647eef7 2684 \midrule
4e468834 2685 Java project & \type{IJavaProject} & The Java project which contains all other objects. \\
8647eef7 2686 \midrule
4e468834
EK
2687 Source folder /\linebreak[2] binary folder /\linebreak[3] external library &
2688 \type{IPackageFragmentRoot} & Hold source or binary files, can be a folder
2689 or a library (zip / jar file). \\
8647eef7 2690 \midrule
4e468834
EK
2691 Each package & \type{IPackageFragment} & Each package is below the
2692 \type{IPackageFragmentRoot}, sub-packages are not leaves of the package,
2693 they are listed directed under \type{IPackageFragmentRoot}. \\
8647eef7 2694 \midrule
4e468834
EK
2695 Java Source file & \type{ICompilationUnit} & The Source file is always below
2696 the package node. \\
8647eef7
EK
2697 \midrule
2698 Types / Fields /\linebreak[3] Methods & \type{IType} / \type{IField}
2699 /\linebreak[3] \type{IMethod} & Types, fields and methods. \\
2700 \bottomrule
4e468834 2701 \end{tabularx}
4e468834
EK
2702\end{table}
2703
4e468834 2704
5308274d
EK
2705\begin{figure}[h]
2706 \centering
2707 \begin{tikzpicture}[%
2708 grow via three points={one child at (0,-0.7) and
2709 two children at (0,-0.7) and (0,-1.4)},
2710 edge from parent path={(\tikzparentnode.south west)+(0.5,0) |-
2711 (\tikzchildnode.west)}]
2712 \tikzstyle{every node}=[draw=black,thick,anchor=west]
2713 \tikzstyle{selected}=[draw=red,fill=red!30]
2714 \tikzstyle{optional}=[dashed,fill=gray!50]
2715 \node {\type{IJavaProject}}
2716 child { node {\type{IPackageFragmentRoot}}
2717 child { node {\type{IPackageFragment}}
2718 child { node {\type{ICompilationUnit}}
2719 child { node {\type{IType}}
2720 child { node {\type{\{ IType \}*}}
2721 child { node {\type{\ldots}}}
2722 }
2723 child [missing] {}
2724 child { node {\type{\{ IField \}*}}}
2725 child { node {\type{IMethod}}
2726 child { node {\type{\{ IType \}*}}
2727 child { node {\type{\ldots}}}
2728 }
2729 }
2730 child [missing] {}
2731 child [missing] {}
2732 child { node {\type{\{ IMethod \}*}}}
2733 }
2734 child [missing] {}
2735 child [missing] {}
2736 child [missing] {}
2737 child [missing] {}
2738 child [missing] {}
2739 child [missing] {}
2740 child [missing] {}
2741 child { node {\type{\{ IType \}*}}}
2742 }
2743 child [missing] {}
2744 child [missing] {}
2745 child [missing] {}
2746 child [missing] {}
2747 child [missing] {}
2748 child [missing] {}
2749 child [missing] {}
2750 child [missing] {}
2751 child [missing] {}
2752 child { node {\type{\{ ICompilationUnit \}*}}}
2753 }
2754 child [missing] {}
2755 child [missing] {}
2756 child [missing] {}
2757 child [missing] {}
2758 child [missing] {}
2759 child [missing] {}
2760 child [missing] {}
2761 child [missing] {}
2762 child [missing] {}
2763 child [missing] {}
2764 child [missing] {}
2765 child { node {\type{\{ IPackageFragment \}*}}}
2766 }
2767 child [missing] {}
2768 child [missing] {}
2769 child [missing] {}
2770 child [missing] {}
2771 child [missing] {}
2772 child [missing] {}
2773 child [missing] {}
2774 child [missing] {}
2775 child [missing] {}
2776 child [missing] {}
2777 child [missing] {}
2778 child [missing] {}
2779 child [missing] {}
2780 child { node {\type{\{ IPackageFragmentRoot \}*}}}
2781 ;
2782 \end{tikzpicture}
fe0a4c48 2783 \caption{The Java model of \name{Eclipse}. ``\type{\{ SomeElement \}*}'' means
8647eef7 2784 ``\type{SomeElement} zero or more times``. For recursive structures,
5308274d
EK
2785 ``\type{\ldots}'' is used.}
2786 \label{fig:javaModel}
2787\end{figure}
2788
3ab3e132 2789\section{The Abstract Syntax Tree}
fe0a4c48 2790\name{Eclipse} is following the common paradigm of using an abstract syntax tree for
03674629
EK
2791source code analysis and manipulation.
2792
03674629
EK
2793When parsing program source code into something that can be used as a foundation
2794for analysis, the start of the process follows the same steps as in a compiler.
3ab3e132 2795This is all natural, because the way a compiler analyzes code is no different
03674629
EK
2796from how source manipulation programs would do it, except for some properties of
2797code that is analyzed in the parser, and that they may be differing in what
4e468834 2798kinds of properties they analyze. Thus the process of translation source code
03674629 2799into a structure that is suitable for analyzing, can be seen as a kind of
65e213db
EK
2800interrupted compilation process \see{fig:interruptedCompilationProcess}.
2801
2802\begin{figure}[h]
2803 \centering
2804 \tikzset{
c876d1a4 2805 base/.style={anchor=north, align=center, rectangle, minimum height=1.4cm},
65e213db 2806 basewithshadow/.style={base, drop shadow, fill=white},
c876d1a4
EK
2807 outlined/.style={basewithshadow, draw, rounded corners, minimum
2808 width=0.4cm},
2809 primary/.style={outlined, font=\bfseries},
65e213db 2810 dashedbox/.style={outlined, dashed},
62563950
EK
2811 arrowpath/.style={black, align=center, font=\small},
2812 processarrow/.style={arrowpath, ->, >=angle 90, shorten >=1pt},
65e213db 2813 }
62563950 2814 \begin{tikzpicture}[node distance=1.3cm and 3cm, scale=1, every
c876d1a4 2815 node/.style={transform shape}]
62563950
EK
2816 \node[base](AuxNode1){\small source code};
2817 \node[primary, right=of AuxNode1, xshift=-2.5cm](Scanner){Scanner};
c876d1a4 2818 \node[primary, right=of Scanner, xshift=0.5cm](Parser){Parser};
72e039dc
EK
2819 \node[dashedbox, below=of Parser](SemanticAnalyzer){Semantic\\Analyzer};
2820 \node[dashedbox, left=of SemanticAnalyzer](SourceCodeOptimizer){Source
2821 Code\\Optimizer};
2822 \node[dashedbox, below=of SourceCodeOptimizer
c876d1a4 2823 ](CodeGenerator){Code\\Generator};
72e039dc
EK
2824 \node[dashedbox, right=of CodeGenerator](TargetCodeOptimizer){Target
2825 Code\\Optimizer};
2826 \node[base, right=of TargetCodeOptimizer](AuxNode2){};
c876d1a4 2827
62563950
EK
2828 \draw[processarrow](AuxNode1) -- (Scanner);
2829
2830 \path[arrowpath] (Scanner) -- node [sloped](tokens){tokens}(Parser);
2831 \draw[processarrow](Scanner) -- (tokens) -- (Parser);
2832
2833 \path[arrowpath] (Parser) -- node (syntax){syntax
2834 tree}(SemanticAnalyzer);
2835 \draw[processarrow](Parser) -- (syntax) -- (SemanticAnalyzer);
2836
2837 \path[arrowpath] (SemanticAnalyzer) -- node
2838 [sloped](annotated){annotated\\tree}(SourceCodeOptimizer);
2839 \draw[processarrow, dashed](SemanticAnalyzer) -- (annotated) --
2840 (SourceCodeOptimizer);
2841
2842 \path[arrowpath] (SourceCodeOptimizer) -- node
2843 (intermediate){intermediate code}(CodeGenerator);
2844 \draw[processarrow, dashed](SourceCodeOptimizer) -- (intermediate) --
2845 (CodeGenerator);
2846
2847 \path[arrowpath] (CodeGenerator) -- node [sloped](target1){target
c876d1a4 2848 code}(TargetCodeOptimizer);
62563950
EK
2849 \draw[processarrow, dashed](CodeGenerator) -- (target1) --
2850 (TargetCodeOptimizer);
2851
2852 \path[arrowpath](TargetCodeOptimizer) -- node [sloped](target2){target
c876d1a4 2853 code}(AuxNode2);
62563950 2854 \draw[processarrow, dashed](TargetCodeOptimizer) -- (target2) (AuxNode2);
65e213db 2855 \end{tikzpicture}
72e039dc 2856 \caption{Interrupted compilation process. {\footnotesize (Full compilation
52da2102
EK
2857 process borrowed from \emph{Compiler construction: principles and practice}
2858 by Kenneth C. Louden\citing{louden1997}.)}}
65e213db
EK
2859 \label{fig:interruptedCompilationProcess}
2860\end{figure}
2861
03674629
EK
2862The process starts with a \emph{scanner}, or lexer. The job of the scanner is to
2863read the source code and divide it into tokens for the parser. Therefore, it is
2864also sometimes called a tokenizer. A token is a logical unit, defined in the
2865language specification, consisting of one or more consecutive characters. In
3ab3e132 2866the Java language the tokens can for instance be the \var{this} keyword, a curly
03674629 2867bracket \var{\{} or a \var{nameToken}. It is recognized by the scanner on the
3ab3e132 2868basis of something equivalent of a regular expression. This part of the process
03674629
EK
2869is often implemented with the use of a finite automata. In fact, it is common to
2870specify the tokens in regular expressions, that in turn is translated into a
2871finite automata lexer. This process can be automated.
2872
3ab3e132 2873The program component used to translate a stream of tokens into something
03674629
EK
2874meaningful, is called a parser. A parser is fed tokens from the scanner and
2875performs an analysis of the structure of a program. It verifies that the syntax
2876is correct according to the grammar rules of a language, that is usually
2877specified in a context-free grammar, and often in a variant of the
fe0a4c48 2878\name{Backus--Naur
03674629
EK
2879Form}\footnote{\url{https://en.wikipedia.org/wiki/Backus-Naur\_Form}}. The
2880result coming from the parser is in the form of an \emph{Abstract Syntax Tree},
2881AST for short. It is called \emph{abstract}, because the structure does not
2882contain all of the tokens produced by the scanner. It only contain logical
2883constructs, and because it forms a tree, all kinds of parentheses and brackets
2884are implicit in the structure. It is this AST that is used when performing the
2885semantic analysis of the code.
2886
2887As an example we can think of the expression \code{(5 + 7) * 2}. The root of
fe0a4c48 2888this tree would in \name{Eclipse} be an \type{InfixExpression} with the operator
d11bcf4d
EK
2889\var{TIMES}, and a left operand that is also an \type{InfixExpression} with the
2890operator \var{PLUS}. The left operand \type{InfixExpression}, has in turn a left
2891operand of type \type{NumberLiteral} with the value \var{``5''} and a right
2892operand \type{NumberLiteral} with the value \var{``7''}. The root will have a
2893right operand of type \type{NumberLiteral} and value \var{``2''}. The AST for
2894this expression is illustrated in \myref{fig:astInfixExpression}.
2895
3ab3e132
EK
2896Contrary to the Java Model, an abstract syntax tree is a heavy-weight
2897representation of source code. It contains information about properties like
2898type bindings for variables and variable bindings for names.
4e468834
EK
2899
2900
d11bcf4d
EK
2901\begin{figure}[h]
2902 \centering
a1d68d95 2903 \begin{tikzpicture}[scale=0.8]
894dce0d 2904 \tikzset{level distance=40pt}
a1d68d95
EK
2905 \tikzset{sibling distance=5pt}
2906 \tikzstyle{thescale}=[scale=0.8]
2907 \tikzset{every tree node/.style={align=center}}
d11bcf4d 2908 \tikzset{edge from parent/.append style={thick}}
a1d68d95
EK
2909 \tikzstyle{inode}=[rectangle,rounded corners,draw,fill=lightgray,drop
2910 shadow,align=center]
2911 \tikzset{every internal node/.style={inode}}
894dce0d 2912 \tikzset{every leaf node/.style={draw=none,fill=none}}
d11bcf4d 2913
894dce0d
EK
2914 \Tree [.\type{InfixExpression} [.\type{InfixExpression}
2915 [.\type{NumberLiteral} \var{``5''} ] [.\type{Operator} \var{PLUS} ]
2916 [.\type{NumberLiteral} \var{``7''} ] ]
d11bcf4d
EK
2917 [.\type{Operator} \var{TIMES} ]
2918 [.\type{NumberLiteral} \var{``2''} ]
2919 ]
2920 \end{tikzpicture}
894dce0d 2921 \caption{The abstract syntax tree for the expression \code{(5 + 7) * 2}.}
d11bcf4d
EK
2922 \label{fig:astInfixExpression}
2923\end{figure}
03674629 2924
c8088eec 2925\subsection{The AST in Eclipse}\label{astEclipse}
fe0a4c48 2926In \name{Eclipse}, every node in the AST is a child of the abstract superclass
03674629
EK
2927\typewithref{org.eclipse.jdt.core.dom}{ASTNode}. Every \type{ASTNode}, among a
2928lot of other things, provides information about its position and length in the
2929source code, as well as a reference to its parent and to the root of the tree.
2930
2931The root of the AST is always of type \type{CompilationUnit}. It is not the same
2932as an instance of an \type{ICompilationUnit}, which is the compilation unit
894dce0d 2933handle of the Java model. The children of a \type{CompilationUnit} is an
03674629
EK
2934optional \type{PackageDeclaration}, zero or more nodes of type
2935\type{ImportDecaration} and all its top-level type declarations that has node
2936types \type{AbstractTypeDeclaration}.
2937
2938An \type{AbstractType\-Declaration} can be one of the types
2939\type{AnnotationType\-Declaration}, \type{Enum\-Declaration} or
2940\type{Type\-Declaration}. The children of an \type{AbstractType\-Declaration}
2941must be a subtype of a \type{BodyDeclaration}. These subtypes are:
2942\type{AnnotationTypeMember\-Declaration}, \type{EnumConstant\-Declaration},
2943\type{Field\-Declaration}, \type{Initializer} and \type{Method\-Declaration}.
2944
2945Of the body declarations, the \type{Method\-Declaration} is the most interesting
2946one. Its children include lists of modifiers, type parameters, parameters and
2947exceptions. It has a return type node and a body node. The body, if present, is
2948of type \type{Block}. A \type{Block} is itself a \type{Statement}, and its
2949children is a list of \type{Statement} nodes.
2950
2951There are too many types of the abstract type \type{Statement} to list up, but
2952there exists a subtype of \type{Statement} for every statement type of Java, as
2953one would expect. This also applies to the abstract type \type{Expression}.
2954However, the expression \type{Name} is a little special, since it is both used
2955as an operand in compound expressions, as well as for names in type declarations
2956and such.
2957
fe0a4c48 2958There is an overview of some of the structure of an \name{Eclipse} AST in
94deee9e
EK
2959\myref{fig:astEclipse}.
2960
e8173df5
EK
2961\begin{figure}[h]
2962 \centering
5e5908eb 2963 \begin{tikzpicture}[scale=0.8]
0f918507
EK
2964 \tikzset{level distance=50pt}
2965 \tikzset{sibling distance=5pt}
5e5908eb 2966 \tikzstyle{thescale}=[scale=0.8]
e8173df5 2967 \tikzset{every tree node/.style={align=center}}
5e5908eb
EK
2968 \tikzset{edge from parent/.append style={thick}}
2969 \tikzstyle{inode}=[rectangle,rounded corners,draw,fill=lightgray,drop
2970 shadow,align=center]
2971 \tikzset{every internal node/.style={inode}}
e8173df5
EK
2972 \tikzset{every leaf node/.style={draw=none,fill=none}}
2973
e601ce99
EK
2974 \Tree [.\type{CompilationUnit} [.\type{[ PackageDeclaration ]} [.\type{Name} ]
2975 [.\type{\{ Annotation \}*} ] ]
2976 [.\type{\{ ImportDeclaration \}*} [.\type{Name} ] ]
0f918507 2977 [.\type{\{ AbstractTypeDeclaration \}+} [.\node(site){\type{\{
e601ce99 2978 BodyDeclaration \}*}}; ] [.\type{SimpleName} ] ]
e8173df5 2979 ]
e601ce99 2980 \begin{scope}[shift={(0.5,-6)}]
5e5908eb 2981 \node[inode,thescale](root){\type{MethodDeclaration}};
e601ce99 2982 \node[inode,thescale](modifiers) at (4.5,-5){\type{\{ IExtendedModifier \}*}
5e5908eb 2983 \\ {\footnotesize (Of type \type{Modifier} or \type{Annotation})}};
e601ce99 2984 \node[inode,thescale](typeParameters) at (-6,-3.5){\type{\{ TypeParameter
5e5908eb 2985 \}*}};
fbeec228 2986 \node[inode,thescale](parameters) at (-5,-5){\type{\{
5e5908eb 2987 SingleVariableDeclaration \}*} \\ {\footnotesize (Parameters)}};
e601ce99 2988 \node[inode,thescale](exceptions) at (5,-3){\type{\{ Name \}*} \\
5e5908eb 2989 {\footnotesize (Exceptions)}};
e601ce99 2990 \node[inode,thescale](return) at (-6.5,-2){\type{Type} \\ {\footnotesize
5e5908eb 2991 (Return type)}};
e601ce99
EK
2992 \begin{scope}[shift={(0,-5)}]
2993 \Tree [.\node(body){\type{[ Block ]} \\ {\footnotesize (Body)}};
2994 [.\type{\{ Statement \}*} [.\type{\{ Expression \}*} ]
2995 [.\type{\{ Statement \}*} [.\type{\ldots} ]]
2996 ]
2997 ]
2998 \end{scope}
0f918507 2999 \end{scope}
e601ce99
EK
3000 \draw[->,>=triangle 90,shorten >=1pt](root.east)..controls +(east:2) and
3001 +(south:1)..(site.south);
0f918507 3002
5e5908eb
EK
3003 \draw (root.south) -- (modifiers);
3004 \draw (root.south) -- (typeParameters);
3005 \draw (root.south) -- ($ (parameters.north) + (2,0) $);
3006 \draw (root.south) -- (exceptions);
3007 \draw (root.south) -- (return);
3008 \draw (root.south) -- (body);
3009
e8173df5 3010 \end{tikzpicture}
fe0a4c48 3011 \caption{The format of the abstract syntax tree in \name{Eclipse}.}
e8173df5
EK
3012 \label{fig:astEclipse}
3013\end{figure}
94deee9e 3014\todoin{Add more to the AST format tree? \myref{fig:astEclipse}}
a2868580 3015
b8fce5af 3016\section{The ASTVisitor}\label{astVisitor}
3ab3e132
EK
3017So far, the only thing that has been addressed is how the data that is going to
3018be the basis for our analysis is structured. Another aspect of it is how we are
3019going to traverse the AST to gather the information we need, so we can conclude
3020about the properties we are analysing. It is of course possible to start at the
3021top of the tree, and manually search through its nodes for the ones we are
3022looking for, but that is a bit inconvenient. To be able to efficiently utilize
3023such an approach, we would need to make our own framework for traversing the
3024tree and visiting only the types of nodes we are after. Luckily, this
fe0a4c48 3025functionality is already provided in \name{Eclipse}, by its
50976f51
EK
3026\typewithref{org.eclipse.jdt.core.dom}{ASTVisitor}.
3027
fe0a4c48
EK
3028The \name{Eclipse} AST, together with its \type{ASTVisitor}, follows the
3029\pattern{Visitor} pattern\citing{designPatterns}. The intent of this design
3030pattern is to facilitate extending the functionality of classes without touching
3031the classes themselves.
0a8ca90c 3032
fe0a4c48
EK
3033Let us say that there is a class hierarchy of elements. These elements all have
3034a method \method{accept(Visitor visitor)}. In its simplest form, the
0a8ca90c
EK
3035\method{accept} method just calls the \method{visit} method of the visitor with
3036itself as an argument, like this: \code{visitor.visit(this)}. For the visitors
3037to be able to extend the functionality of all the classes in the elements
3038hierarchy, each \type{Visitor} must have one visit method for each concrete
3039class in the hierarchy. Say the hierarchy consists of the concrete classes
3040\type{ConcreteElementA} and \type{ConcreteElementB}. Then each visitor must have
3041the (possibly empty) methods \method{visit(ConcreteElementA element)} and
3042\method{visit(ConcreteElementB element)}. This scenario is depicted in
3043\myref{fig:visitorPattern}.
50976f51 3044
3572a8ac
EK
3045\begin{figure}[h]
3046 \centering
3047 \tikzstyle{abstract}=[rectangle, draw=black, fill=white, drop shadow, text
3048 centered, anchor=north, text=black, text width=6cm, every one node
3049part/.style={align=center, font=\bfseries\itshape}]
3050 \tikzstyle{concrete}=[rectangle, draw=black, fill=white, drop shadow, text
3051 centered, anchor=north, text=black, text width=6cm]
3052 \tikzstyle{inheritarrow}=[->, >=open triangle 90, thick]
3053 \tikzstyle{commentarrow}=[->, >=angle 90, dashed]
3054 \tikzstyle{line}=[-, thick]
3055 \tikzset{every one node part/.style={align=center, font=\bfseries}}
3056 \tikzset{every second node part/.style={align=center, font=\ttfamily}}
3572a8ac
EK
3057
3058 \begin{tikzpicture}[node distance=1cm, scale=0.8, every node/.style={transform
3059 shape}]
3060 \node (Element) [abstract, rectangle split, rectangle split parts=2]
3061 {
3062 \nodepart{one}{Element}
3063 \nodepart{second}{+accept(visitor: Visitor)}
3064 };
3065 \node (AuxNode01) [text width=0, minimum height=2cm, below=of Element] {};
3066 \node (ConcreteElementA) [concrete, rectangle split, rectangle split
3067 parts=2, left=of AuxNode01]
3068 {
3069 \nodepart{one}{ConcreteElementA}
3070 \nodepart{second}{+accept(visitor: Visitor)}
3071 };
3072 \node (ConcreteElementB) [concrete, rectangle split, rectangle split
3073 parts=2, right=of AuxNode01]
3074 {
3075 \nodepart{one}{ConcreteElementB}
3076 \nodepart{second}{+accept(visitor: Visitor)}
3077 };
3078
3079 \node[comment, below=of ConcreteElementA] (CommentA) {visitor.visit(this)};
3080
3081 \node[comment, below=of ConcreteElementB] (CommentB) {visitor.visit(this)};
3082
3083 \node (AuxNodeX) [text width=0, minimum height=1cm, below=of AuxNode01] {};
3084
3085 \node (Visitor) [abstract, rectangle split, rectangle split parts=2,
3086 below=of AuxNodeX]
3087 {
3088 \nodepart{one}{Visitor}
3089 \nodepart{second}{+visit(ConcreteElementA)\\+visit(ConcreteElementB)}
3090 };
3091 \node (AuxNode02) [text width=0, minimum height=2cm, below=of Visitor] {};
3092 \node (ConcreteVisitor1) [concrete, rectangle split, rectangle split
3093 parts=2, left=of AuxNode02]
3094 {
3095 \nodepart{one}{ConcreteVisitor1}
3096 \nodepart{second}{+visit(ConcreteElementA)\\+visit(ConcreteElementB)}
3097 };
3098 \node (ConcreteVisitor2) [concrete, rectangle split, rectangle split
3099 parts=2, right=of AuxNode02]
3100 {
3101 \nodepart{one}{ConcreteVisitor2}
3102 \nodepart{second}{+visit(ConcreteElementA)\\+visit(ConcreteElementB)}
3103 };
3104
3105
3106 \draw[inheritarrow] (ConcreteElementA.north) -- ++(0,0.7) -|
3107 (Element.south);
3108 \draw[line] (ConcreteElementA.north) -- ++(0,0.7) -|
3109 (ConcreteElementB.north);
3110
3111 \draw[inheritarrow] (ConcreteVisitor1.north) -- ++(0,0.7) -|
3112 (Visitor.south);
3113 \draw[line] (ConcreteVisitor1.north) -- ++(0,0.7) -|
3114 (ConcreteVisitor2.north);
3115
3116 \draw[commentarrow] (CommentA.north) -- (ConcreteElementA.south);
3117 \draw[commentarrow] (CommentB.north) -- (ConcreteElementB.south);
3118
3119
3120 \end{tikzpicture}
3121 \caption{The Visitor Pattern.}
3122 \label{fig:visitorPattern}
3123\end{figure}
3124
0a8ca90c
EK
3125The use of the visitor pattern can be appropriate when the hierarchy of elements
3126is mostly stable, but the family of operations over its elements is constantly
fe0a4c48 3127growing. This is clearly the case for the \name{Eclipse} AST, since the hierarchy of
0a8ca90c
EK
3128type \type{ASTNode} is very stable, but the functionality of its elements is
3129extended every time someone needs to operate on the AST. Another aspect of the
fe0a4c48 3130\name{Eclipse} implementation is that it is a public API, and the visitor pattern is an
0a8ca90c
EK
3131easy way to provide access to the nodes in the tree.
3132
fe0a4c48 3133The version of the visitor pattern implemented for the AST nodes in \name{Eclipse} also
0a8ca90c
EK
3134provides an elegant way to traverse the tree. It does so by following the
3135convention that every node in the tree first let the visitor visit itself,
b3adff95
EK
3136before it also makes all its children accept the visitor. The children are only
3137visited if the visit method of their parent returns \var{true}. This pattern
3138then makes for a prefix traversal of the AST. If postfix traversal is desired,
3139the visitors also has \method{endVisit} methods for each node type, that is
3140called after the \method{visit} method for a node. In addition to these visit
3141methods, there are also the methods \method{preVisit(ASTNode)},
3142\method{postVisit(ASTNode)} and \method{preVisit2(ASTNode)}. The
3143\method{preVisit} method is called before the type-specific \method{visit}
3144method. The \method{postVisit} method is called after the type-specific
3145\method{endVisit}. The type specific \method{visit} is only called if
3146\method{preVisit2} returns \var{true}. Overriding the \method{preVisit2} is also
3147altering the behavior of \method{preVisit}, since the default implementation is
94c59647
EK
3148responsible for calling it.
3149
3150An example of a trivial \type{ASTVisitor} is shown in
3151\myref{lst:astVisitorExample}.
3152
3153\begin{listing}
3154\begin{minted}{java}
3155public class CollectNamesVisitor extends ASTVisitor {
3156 Collection<Name> names = new LinkedList<Name>();
3157
3158 @Override
3159 public boolean visit(QualifiedName node) {
3160 names.add(node);
3161 return false;
3162 }
3163
3164 @Override
3165 public boolean visit(SimpleName node) {
3166 names.add(node);
3167 return true;
3168 }
3169}
3170\end{minted}
3171\caption{An \type{ASTVisitor} that visits all the names in a subtree and adds
3172them to a collection, except those names that are children of any
3173\type{QualifiedName}.}
3174\label{lst:astVisitorExample}
3175\end{listing}
3176
b8fce5af
EK
3177\section{Property collectors}\label{propertyCollectors}
3178The prefixes and unfixes are found by property
3179collectors\typeref{no.uio.ifi.refaktor.extractors.collectors.PropertyCollector}.
3180A property collector is of the \type{ASTVisitor} type, and thus visits nodes of
3181type \type{ASTNode} of the abstract syntax tree \see{astVisitor}.
3182
3183\subsection{The PrefixesCollector}
3184The \typewithref{no.uio.ifi.refaktor.extractors.collectors}{PrefixesCollector}
ccd252c5 3185finds prefixes that makes up the basis for calculating move targets for the
fe0a4c48 3186\refa{Extract and Move Method} refactoring. It visits expression
b8fce5af
EK
3187statements\typeref{org.eclipse.jdt.core.dom.ExpressionStatement} and creates
3188prefixes from its expressions in the case of method invocations. The prefixes
3189found is registered with a prefix set, together with all its sub-prefixes.
3190
3191\subsection{The UnfixesCollector}\label{unfixes}
3192The \typewithref{no.uio.ifi.refaktor.extractors.collectors}{UnfixesCollector}
47c0bea8 3193finds unfixes within a selection.
5230243c 3194\todoin{Give more technical detail?}
b8fce5af 3195
b8fce5af 3196
0a8ca90c 3197
5195bf0c 3198\subsection{The ContainsReturnStatementCollector}
21506154 3199\todoin{Remove section?}
5195bf0c
EK
3200The
3201\typewithref{no.uio.ifi.refaktor.analyze.collectors}{ContainsReturnStatementCollector}
3202is a very simple property collector. It only visits the return statements within
3203a selection, and can report whether it encountered a return statement or not.
3204
b8d069e4
EK
3205\subsection{The LastStatementCollector}
3206The \typewithref{no.uio.ifi.refaktor.analyze.collectors}{LastStatementCollector}
3207collects the last statement of a selection. It does so by only visiting the top
3208level statements of the selection, and compares the textual end offset of each
3ab3e132 3209encountered statement with the end offset of the previous statement found.
b8d069e4 3210
95c0f364 3211\section{Checkers}\label{checkers}
d6f8e65a
EK
3212The checkers are a range of classes that checks that text selections complies
3213with certain criteria. All checkers operates under the assumption that the code
3214they check is free from compilation errors. If a
95c0f364
EK
3215\typewithref{no.uio.ifi.refaktor.analyze.analyzers}{Checker} fails, it throws a
3216\type{CheckerException}. The checkers are managed by the
3217\type{LegalStatementsChecker}, which does not, in fact, implement the
3218\type{Checker} interface. It does, however, run all the checkers registered with
3219it, and reports that all statements are considered legal if no
08cbba3b 3220\type{CheckerException} is thrown. Many of the checkers either extends the
f72f72f1 3221\type{PropertyCollector} or utilizes one or more property collectors to verify
3ab3e132 3222some criteria. The checkers registered with the \type{LegalStatementsChecker}
f72f72f1 3223are described next. They are run in the order presented below.
95c0f364 3224
a22915d0 3225\subsection{The CallToProtectedOrPackagePrivateMethodChecker}
21506154
EK
3226This checker is used to check that at selection does not contain a call to a
3227method that is protected or package-private. Such a method either has the access
3228modifier \code{protected} or it has no access modifier.
a22915d0 3229
21506154
EK
3230The workings of the \type{CallToProtectedOrPackagePrivateMethod\-Checker} is
3231very simple. It looks for calls to methods that are either protected or
3232package-private within the selection, and throws an
3233\type{IllegalExpressionFoundException} if one is found.
a22915d0 3234
1fceb439
EK
3235\subsection{The DoubleClassInstanceCreationChecker}
3236The \type{DoubleClassInstanceCreationChecker} checks that there are no double
3237class instance creations where the inner constructor call take and argument that
3238is built up using field references.
3239
3240The checker visits all nodes of type \type{ClassInstanceCreation} within a
3241selection. For all of these nodes, if its parent also is a class instance
3242creation, it accepts a visitor that throws a
e36eade0 3243\type{IllegalExpressionFoundException} if it encounters a name that is a field
1fceb439
EK
3244reference.
3245
2a4b8dea 3246\subsection{The InstantiationOfNonStaticInnerClassChecker}
c6102ec2
EK
3247The \type{InstantiationOfNonStaticInnerClassChecker} checks that selections
3248does not contain instantiations of non-static inner classes. The
58467004 3249\type{MoveInstanceMethodProcessor} in \name{Eclipse} does not handle such
c6102ec2
EK
3250instantiations gracefully when moving a method. This problem is also related to
3251bug\ldots \todoin{File Eclipse bug report}
2a4b8dea 3252
8d0caf4c 3253\subsection{The EnclosingInstanceReferenceChecker}
98b06ec2
EK
3254The purpose of this checker is to verify that the names in a text selection are
3255not referencing any enclosing instances. In theory, the underlying problem could
3256be solved in some situations, but our dependency on the
8d0caf4c
EK
3257\type{MoveInstanceMethodProcessor} prevents this.
3258
3259The
3260\typewithref{no.uio.ifi.refaktor.analyze.analyzers}{EnclosingInstanceReferenceChecker}
3261is a modified version of the
801ff00a 3262\typewithref{org.eclipse.jdt.internal.corext.refactoring.structure.MoveInstanceMethod\-Processor}{EnclosingInstanceReferenceFinder}
8d0caf4c 3263from the \type{MoveInstanceMethodProcessor}. Wherever the
98b06ec2
EK
3264\type{EnclosingInstanceReferenceFinder} would create a fatal error status, the
3265checker will throw a \type{CheckerException}.
3266
3267The checker works by first finding all of the enclosing types of a selection.
3268Thereafter, it visits all the simple names of the selection to check that they
3269are not references to variables or methods declared in any of the enclosing
3270types. In addition, the checker visits \var{this}-expressions to verify that no
3271such expressions are qualified with any name.
8d0caf4c 3272
9cc2cd59 3273\subsection{The ReturnStatementsChecker}\label{returnStatementsChecker}
d59e3ab7
EK
3274The checker for return statements is meant to verify that a text selection is
3275consistent regarding return statements.
3276
3277If the selection is free from return statements, then the checker validates. So
3278this is the first thing the checker investigates.
d6f8e65a 3279
e36eade0 3280If the checker proceeds any further, it is because the selection contains one
d6f8e65a 3281or more return statements. The next test is therefore to check if the last
d59e3ab7
EK
3282statement of the selection ends in either a return or a throw statement. The
3283responsibility for checking that the last statement of the selection eventually
3284ends in a return or throw statement, is put on the
801ff00a 3285\type{LastStatementOfSelectionEndsInReturnOrThrowChecker}. For every node
d59e3ab7 3286visited, if the node is a statement, it does a test to see if the statement is a
801ff00a
EK
3287return, a throw or if it is an implicit return statement. If this is the case,
3288no further checking is done. This checking is done in the \code{preVisit2}
3289method \see{astVisitor}. If the node is not of a type that is being handled by
d59e3ab7 3290its type-specific visit method, the checker performs a simple test. If the node
801ff00a
EK
3291being visited is not the last statement of its parent that is also enclosed by
3292the selection, an \type{IllegalStatementFoundException} is thrown. This ensures
3293that all statements are taken care of, one way or the other. It also ensures
3294that the checker is conservative in the way it checks for legality of the
3295selection.
3296
3297To examine if a statement is an implicit return statement, the checker first
3298finds the last statement declared in its enclosing method. If this statement is
3299the same as the one under investigation, it is considered an implicit return
3300statement. If the statements are not the same, the checker does a search to see
d59e3ab7 3301if the statement examined is also the last statement of the method that can be
801ff00a
EK
3302reached. This includes the last statement of a block statement, a labeled
3303statement, a synchronized statement or a try statement, that in turn is the last
d59e3ab7
EK
3304statement enclosed by one of the statement types listed. This search goes
3305through all the parents of a statement until a statement is found that is not
3306one of the mentioned acceptable parent statements. If the search ends in a
3307method declaration, then the statement is considered to be the last reachable
3308statement of the method, and thus it is an implicit return statement.
3309
3310There are two kinds of statements that are handled explicitly: If-statements and
3311try-statements. Block, labeled and do-statements are handled by fall-through to
3312the other two.
3313
3314If-statements are handled explicitly by overriding their type-specific visit
3315method. If the then-part does not contain any return or throw statements an
3316\type{IllegalStatementFoundException} is thrown. If it does contain a return or
801ff00a 3317throw, its else-part is checked. If the else-part is non-existent, or it does
d59e3ab7
EK
3318not contain any return or throw statements an exception is thrown. If no
3319exception is thrown while visiting the if-statement, its children are visited.
801ff00a 3320
d59e3ab7 3321A try-statement is checked very similar to an if-statement. Its body must
801ff00a 3322contain a return or throw. The same applies to its catch clauses and finally
d59e3ab7 3323body. Failure to validate produces an \type{IllegalStatementFoundException}.
801ff00a
EK
3324
3325If the checker does not complain at any point, the selection is considered valid
3326with respect to return statements.
41cde50e
EK
3327
3328\subsection{The AmbiguousReturnValueChecker}
5230243c 3329This checker verifies that there are no ambiguous return values in a selection.
9cc2cd59 3330
5230243c 3331First, the checker needs to collect some data. Those data are the binding keys
9cc2cd59
EK
3332for all simple names that are assigned to within the selection, including
3333variable declarations, but excluding fields. The checker also collects whether
3334there exists a return statement in the selection or not. No further checks of
3335return statements are needed, since, at this point, the selection is already
3336checked for illegal return statements \see{returnStatementsChecker}.
3337
3338After the binding keys of the assignees are collected, the checker searches the
3339part of the enclosing method that is after the selection for references whose
3ab3e132
EK
3340binding keys are among the collected keys. If more than one unique referral is
3341found, or only one referral is found, but the selection also contains a return
3342statement, we have a situation with an ambiguous return value, and an exception
3343is thrown.
9cc2cd59
EK
3344
3345%\todoin{Explain why we do not need to consider variables assigned inside
3346%local/anonymous classes. (The referenced variables need to be final and so
3347%on\ldots)}
41cde50e
EK
3348
3349\subsection{The IllegalStatementsChecker}
3350This checker is designed to check for illegal statements.
3351
8fa89d14
EK
3352Notice that labels in break and continue statements needs some special
3353treatment. Since a label does not have any binding information, we have to
3354search upwards in the AST to find the \type{LabeledStatement} that corresponds
3355to the label from the break or continue statement, and check that it is
3356contained in the selection. If the break or continue statement does not have a
3357label attached to it, it is checked that its innermost enclosing loop or switch
3358statement (break statements only) also is contained in the selection.
08cbba3b 3359
8fa89d14 3360\todoin{Follow the development in the semantics section\ldots}
41cde50e 3361
58467004
EK
3362\chapter{Case Studies}
3363
3364In this chapter I am going to present a few case studies. This is done to give
e36eade0 3365an impression of how the search-based \ExtractAndMoveMethod refactoring
01d46361
EK
3366performs when giving it a larger project to take on. I will try to answer where
3367it lacks, in terms of completeness, as well as showing its effect on refactored
3368source code.
58467004
EK
3369
3370The first and primary case, is refactoring source code from the \name{Eclipse
3371JDT UI} project. The project is chosen because it is a real project, still in
3372development, with a large code base that is written by many different people
3373through several years. The code is installed in thousands of \name{Eclipse}
3374applications worldwide, and must be seen as a good representative for
3375professionally written Java source code. It is also the home for most of the JDT
3376refactoring code.
3377
3378For the second case, the \ExtractAndMoveMethod refactoring is fed the
3379\code{no.uio.ifi.refaktor} project. This is done as a variation of the
3380``dogfooding'' methodology, where you use your own tools to do your job, also
3381referred to as ``eating your own dog
3382food''\citing{harrisonDogfooding2006}.
3383
3384\section{The tools}
6fabec9b
EK
3385For conducting these experiments, three tools are used. Two of the ``tools''
3386both uses Eclipse as their platform. The first is our own tool,
58467004 3387written to be able to run the \ExtractAndMoveMethod refactoring as a batch
e36eade0 3388process, analyzing and refactoring many methods after each other. The second is
c9488804
EK
3389JUnit, that is used for running the projects own unit tests on the target code
3390both before and after it is refactored. The last tool that is used is a code
3391quality management tool, called \name{SonarQube}. It can be used to perform
3392different tasks for assuring code quality, but we are only going to take
3393advantage of one of its main features, namely Quality profiles.
58467004
EK
3394
3395A quality profile is used to define a set of coding rules that a project is
3396supposed to comply with. Failure to following these rules will be recorded as
3397so-called ``issues'', marked as having one of several degrees of severities,
3398ranging from ``info'' to ``blocker'', where the latter one is the most severe.
3399The measurements done for these case studies are therefore not presented as
3400fine-grained software metrics results, but rather as the number of issues for
6fabec9b
EK
3401each defined rule.
3402
bc7b5d67 3403In addition to the coding rules defined through quality profiles, \name{SonarQube}
6fabec9b
EK
3404calculates the complexity of source code. The metric that is used is cyclomatic
3405complexity, developed by Thomas J. McCabe in
34061976\citing{mccabeComplexity1976}. In this metric, functions have an initial
3407complexity of 1, and whenever the control flow of a function splits, the
3408complexity increases by
3409one\footnote{\url{http://docs.codehaus.org/display/SONAR/Metric+definitions}}.
bc7b5d67 3410\name{SonarQube} discriminates between functions and accessors. Accessors
6fabec9b
EK
3411are methods that are recognized as setters or getters. Accessors are not counted
3412in the complexity analysis.
3413
bc7b5d67
EK
3414\section{The \name{SonarQube} quality profile}
3415The quality profile that is used with \name{SonarQube} in these case studies has got
3416the name \name{IFI Refaktor Case Study} (version 6). The rules defined in the
3417profile are chosen because they are the available rules found in \name{SonarQube} that
3418measures complexity and coupling. Now follows a description of the rules in the
3419quality profile. The values that are set for these rules are listed in
6fabec9b
EK
3420\myref{tab:qualityProfile1}.
3421
3422\begin{description}
3423 \item[Avoid too complex class] is a rule that measures cyclomatic complexity
3424 for every statement in the body of a class, except for setters and getter.
bc7b5d67 3425 The threshold value set is its default value of 200.
6fabec9b
EK
3426
3427 \item[Classes should not be coupled to too many other classes ] is a rule that
3428 measures how many other classes a class depends upon. It does not count the
3429 dependencies of nested classes. It is meant to promote the Single
3430 Responsibility Principle. Although not explicitly stated, the rule's metric
3431 resembles the \metr{Coupling between object classes} (CBO) metric that is
3432 described by Chidamber and Kemerer in their article \tit{A Metrics Suite for
3433 Object Oriented Design}\citing{metricsSuite1994}. The max value for the rule
3434 is chosen on the background of an empirical study by Raed Shatnawi, that
3435 concludes that the number 9 is the most useful threshold for the CBO
98660ec0
EK
3436 metric\citing{shatnawiQuantitative2010}. This study is also performed on
3437 Eclipse source code, so this threshold value should be particularly well
3438 suited for the Eclipse JDT UI case in this chapter.
6fabec9b
EK
3439
3440 \item[Control flow statements \ldots{} should not be nested too deeply] is
3441 a rule that is meant to counter ``Spaghetti code''. It measures the nesting
3442 level of if, for, while, switch and try statements. The nesting levels start
bc7b5d67 3443 at 1. The max value set is its default value of 3.
6fabec9b
EK
3444
3445 \item[Methods should not be too complex] is a rule that measures cyclomatic
3446 complexity the same way as the ``Avoid too complex class'' rule. The max
3447 value used is 10, which ``seems like a reasonable, but not magical, upper
3448 limit``\citing{mccabeComplexity1976}.
3449
3450 \item[Methods should not have too many lines] is a rule that simply measures
bc7b5d67
EK
3451 the number of lines in methods. The threshold value of 20 is used for this
3452 metric. This is based on my own subjective opinions, as the default value of
3453 100 seems a bit too loose.
6fabec9b
EK
3454
3455 \item[NPath Complexity] is a rule that measures the number of possible
3456 execution paths through a function. The value used is the default value of
3457 200, that seems like a recognized threshold for this metric.
3458
3459 \item[Too many methods] is a rule that measures the number of methods in a
bc7b5d67 3460 class. The threshold value used is the default value of 10.
6fabec9b
EK
3461
3462\end{description}
3463
3464
3465\begin{table}[htb]
bc7b5d67 3466 \caption{The \name{IFI Refaktor Case Study} quality profile (version 6).}
6fabec9b
EK
3467 \label{tab:qualityProfile1}
3468 \centering
3469 \begin{tabularx}{\textwidth}{@{}>{\bfseries}L{1.5}R{0.5}@{}}
3470 \toprule
3471 \textbf{Rule} & \textbf{Max value} \\
3472 \midrule
3473 Avoid too complex class & 200 \\
3474 Classes should not be coupled to too many other classes (Single
3475 Responsibility Principle) & 9 \\
3476 Control flow statements \ldots{} should not be nested too deeply &
3477 3 \\
3478 Methods should not be too complex & 10 \\
3479 Methods should not have too many lines & 20 \\
3480 NPath Complexity & 200 \\
bc7b5d67 3481 Too many methods & 10 \\
6fabec9b
EK
3482
3483 \bottomrule
3484 \end{tabularx}
3485\end{table}
58467004 3486
bc7b5d67
EK
3487\section{The input}
3488A precondition for the source code that is going to be the target for a series
3489of \ExtractAndMoveMethod refactorings, is that it is organized as an Eclipse
3490project. It is also assumed that the code is free from compilation errors.
3491
3492\section{The experiment}
3493For a given project, the first job that is done, is to refactor its source code.
3494The refactoring batch job produces three things: The refactored project,
3495statistics gathered during the execution of the series of refactorings, and an
3496error log describing any errors happening during this execution. See
3497\myref{sec:benchmarking} for more information about how the refactorings are
3498performed.
3499
3500After the refactoring process is done, the before- and after-code is analyzed
3501with \name{SonarQube}. The analysis results are then stored in a database and
3502displayed through a \name{SonarQube} server with a web interface.\todoin{How
3503long are these results going to be publicly available?}
3504
3505The before- and after-code is also tested with their own unit tests. This is
3506done to discover any changes in the semantic behavior of the refactored code,
3507within the limits of these tests.
3508
58467004 3509\section{Case 1: The Eclipse JDT UI project}
98660ec0 3510This case is the ultimate test for our \ExtractAndMoveMethod refactoring. The
e36eade0 3511target source code is massive. With its over 300,000 lines of code and over
98660ec0 351225,000 methods, it is formidable task to perform automated changes on it. There
e36eade0 3513should be plenty of situations where things can go wrong, and, as we shall see
701c559a 3514later, they do.
98660ec0
EK
3515
3516I will start by presenting some statistics from the refactoring execution,
3517before I pick apart the \name{SonarQube} analysis and conclude by commenting on
701c559a
EK
3518the results from the unit tests. The configuration for the experiment is
3519specified in \myref{tab:configurationCase1}.
98660ec0 3520
701c559a
EK
3521\begin{table}[htb]
3522 \caption{Configuration for Case 1.}
3523 \label{tab:configurationCase1}
3524 \centering
3525 \begin{tabularx}{\textwidth}{@{}>{\bfseries}L{0.67}L{1.33}@{}}
3526 \toprule
3527 \spancols{2}{Benchmark data} \\
3528 \midrule
3529 Launch configuration & CaseStudy.launch \\
3530 Project & no.uio.ifi.refaktor.benchmark \\
3531 Repository & gitolite@git.uio.no:ifi-stolz-refaktor \\
3532 Commit & 43c16c04520746edd75f8dc2a1935781d3d9de6c \\
3533 \midrule
3534 \spancols{2}{Input data} \\
3535 \midrule
3536 Project & org.eclipse.jdt.ui \\
3537 Repository & git://git.eclipse.org/gitroot/jdt/eclipse.jdt.ui.git \\
3538 Commit & f218388fea6d4ec1da7ce22432726c244888bb6b \\
3539 Branch & R3\_8\_maintenance \\
3540 Tests suites & org.eclipse.jdt.ui.tests.AutomatedSuite,
3541 org.eclipse.jdt.ui.tests.refactoring.all.\-AllAllRefactoringTests \\
3542
3543 \bottomrule
3544 \end{tabularx}
3545\end{table}
98660ec0
EK
3546\subsection{Statistics}
3547The statistics gathered during the refactoring execution is presented in
3548\myref{tab:case1Statistics}.
3549
7fba220b
EK
3550\begin{table}[htb]
3551 \caption{Statistics after batch refactoring the Eclipse JDT UI project with
3552 the \ExtractAndMoveMethod refactoring.}
3553 \label{tab:case1Statistics}
3554 \centering
3555 \begin{tabularx}{\textwidth}{@{}>{\bfseries}L{1.5}R{0.5}@{}}
3556 \toprule
3557 \spancols{2}{Time used} \\
3558 \midrule
3559 Total time & 98m38s \\
3560 Analysis time & 14m41s (15\%) \\
3561 Change time & 74m20s (75\%) \\
3562 Miscellaneous tasks & 9m37s (10\%) \\
3563 \midrule
3564 \spancols{2}{Numbers of each type of entity analyzed} \\
3565 \midrule
3566 Packages & 110 \\
3567 Compilation units & 2,097 \\
3568 Types & 3,152 \\
3569 Methods & 27,667 \\
3570 Text selections & 591,500 \\
3571 \midrule
3572 \spancols{2}{Numbers for \ExtractAndMoveMethod refactoring candidates} \\
3573 \midrule
3574 Methods chosen as candidates & 2,552 \\
3575 Methods NOT chosen as candidates & 25,115 \\
3576 Candidate selections (multiple per method) & 36,843 \\
3577 \midrule
3578 \spancols{2}{\ExtractAndMoveMethod refactorings executed} \\
3579 \midrule
3580 Fully executed & 2,469 \\
3581 Not fully executed & 83 \\
3582 Total attempts & 2,552 \\
3583 \midrule
3584 \spancols{2}{Primitive refactorings executed} \\
3585 \spancols{2}{\small \ExtractMethod refactorings} \\
3586 \midrule
3587 Performed & 2,483 \\
3588 Not performed & 69 \\
3589 Total attempts & 2,552 \\
3590 \midrule
3591 \spancols{2}{\small \MoveMethod refactorings} \\
3592 \midrule
3593 Performed & 2469 \\
3594 Not performed & 14 \\
3595 Total attempts & 2,483 \\
3596
3597 \bottomrule
3598 \end{tabularx}
3599\end{table}
3600
98660ec0 3601\subsubsection{Execution time}
e36eade0 3602I consider the total execution time of approximately 1.5 hours as being
98660ec0
EK
3603acceptable. It clearly makes the batch process unsuitable for doing any
3604on-demand analysis or changes, but it is good enough for running periodic jobs,
3605like over-night analysis.
3606
3607As the statistics show, 75\% of the total time goes into making the actual code
3608changes. The time consumers are here the primitive \ExtractMethod and
3609\MoveMethod refactorings. Included in the change time is the parsing and
3610precondition checking done by the refactorings, as well as textual changes done
3611to files on disk. All this parsing and disk access is time-consuming, and
3612constitute a large part of the change time.
3613
3614In comparison, the pure analysis time, used to find suitable candidates, only
3615make up for 15\% of the total time consumed. This includes analyzing almost
3616600,000 text selections, while the number of attempted executions of the
3617\ExtractAndMoveMethod refactoring are only about 2,500. So the number of
3618executed primitive refactorings are approximately 5,000. Assuming the time used
3619on miscellaneous tasks are used mostly for parsing source code for the analysis,
3620we can say that the time used for analyzing code is at most 25\% of the total
3621time. This means that for every primitive refactoring executed, we can analyze
3622around 360 text selections. So, with an average of about 21 text selections per
3623method, it is reasonable to say that we can analyze over 15 methods in the time
3624it takes to perform a primitive refactoring.
3625
3626\subsubsection{Refactoring candidates}
3627Out of the 27,667 methods that was analyzed, 2,552 methods contained selections
3628that was considered candidates for the \ExtractAndMoveMethod refactoring. This
3629is roughly 9\% off the methods in the project. These 9\% of the methods had on
3630average 14.4 text selections that was considered considered possible refactoring
3631candidates.
3632
3633\subsubsection{Executed refactorings}
36342,469 out of 2,552 attempts on executing the \ExtractAndMoveMethod refactoring
3635was successful, giving a success rate of 96.7\%. The failure rate of 3.3\% stem
3636from situations where the analysis finds a candidate selection, but the change
3637execution fails. This failure could be an exception that was thrown, and the
3638refactoring aborts. It could also be the precondition checking for one of the
3639primitive refactorings that gives us an error status, meaning that if the
3640refactoring proceeds, the code will contain compilation errors afterwards,
3641forcing the composite refactoring to abort. This means that if the
3642\ExtractMethod refactoring fails, no attempt is done for the \MoveMethod
3643refactoring. \todo{Redundant information? Put in benchmark chapter?}
3644
3645Out of the 2,552 \ExtractMethod refactorings that was attempted executed, 69 of
3646them failed. This give a failure rate of 2.7\% for the primitive refactoring. In
3647comparison, the \MoveMethod refactoring had a failure rate of 0.6 \% of the
36482,483 attempts on the refactoring.
3649
3650\subsection{\name{SonarQube} analysis}
701c559a
EK
3651Results from the \name{SonarQube} analysis is shown in
3652\myref{tab:case1ResultsProfile1}.
98660ec0 3653
c9488804 3654\begin{table}[htb]
bc7b5d67
EK
3655 \caption{Results for analyzing the Eclipse JDT UI project, before and after
3656 the refactoring, with \name{SonarQube} and the \name{IFI Refaktor Case Study}
3657 quality profile. (Bold numbers are better.)}
c9488804
EK
3658 \label{tab:case1ResultsProfile1}
3659 \centering
c9488804
EK
3660 \begin{tabularx}{\textwidth}{@{}>{\bfseries}L{1.5}R{0.25}R{0.25}@{}}
3661 \toprule
bc7b5d67 3662 \textnormal{Number of issues for each rule} & Before & After \\
c9488804
EK
3663 \midrule
3664 Avoid too complex class & 81 & \textbf{79} \\
3665 Classes should not be coupled to too many other classes (Single
3666 Responsibility Principle) & \textbf{1,098} & 1,199 \\
3667 Control flow statements \ldots{} should not be nested too deeply & 1,375 &
3668 \textbf{1,285} \\
3669 Methods should not be too complex & 1,518 & \textbf{1,452} \\
3670 Methods should not have too many lines & 3,396 & \textbf{3,291} \\
3671 NPath Complexity & 348 & \textbf{329} \\
bc7b5d67 3672 Too many methods & \textbf{454} & 520 \\
6fabec9b 3673 \midrule
bc7b5d67 3674 Total number of issues & 8,270 & \textbf{8,155} \\
6fabec9b
EK
3675 \midrule
3676 \midrule
bc7b5d67 3677 \spancols{3}{Complexity} \\
6fabec9b
EK
3678 \midrule
3679 Per function & 3.6 & \textbf{3.3} \\
3680 Per class & \textbf{29.5} & 30.4 \\
3681 Per file & \textbf{44.0} & 45.3 \\
3682 \midrule
3683 Total complexity & \textbf{84,765} & 87,257 \\
3684 \midrule
3685 \midrule
bc7b5d67 3686 \spancols{3}{Numbers of each type of entity analyzed} \\
6fabec9b
EK
3687 \midrule
3688 Files & 1,926 & 1,926 \\
3689 Classes & 2,875 & 2,875 \\
3690 Functions & 23,744 & 26,332 \\
3691 Accessors & 1,296 & 1,019 \\
3692 Statements & 162,768 & 165,145 \\
3693 Lines of code & 320,941 & 329,112 \\
3694 \midrule
bc7b5d67 3695 Technical debt (in days) & \textbf{1,003.4} & 1,032.7 \\
c9488804
EK
3696 \bottomrule
3697 \end{tabularx}
3698\end{table}
6fabec9b 3699
701c559a 3700\subsubsection{Diversity in the number of entities analyzed}
e36eade0 3701The analysis performed by \name{SonarQube} is reporting fewer methods than found
701c559a
EK
3702by the pre-refactoring analysis. \name{SonarQube} discriminates between
3703functions (methods) and accessors, so the 1,296 accessors play a part in this
3704calculation. \name{SonarQube} also has the same definition as our plugin when
3705it comes to how a class is defined. Therefore is seems like \name{SonarQube}
3706misses 277 classes that our plugin handles. This can explain why the {SonarQube}
3707report differs from our numbers by approximately 2,500 methods,
3708
7fba220b
EK
3709\subsubsection{Complexity}
3710On all complexity rules that works on the method level, the number of issues
3711decreases with between 3.1\% and 6.5\% from before to after the refactoring. The
3712average complexity of a method decreases from 3.6 to 3.3, which is an
3713improvement of about 8.3\%. So, on the method level, the refactoring must be
3714said to have a slightly positive impact.
3715
3716The improvement in complexity on the method level is somewhat traded for
3717complexity on the class level. The complexity per class metric is worsen by 3\%
3718from before to after. The issues for the ``Too many methods'' rule also
3719increases by 14.5\%. These numbers indicate that the refactoring makes quite a
3720lot of the classes a little more complex overall. This is the expected outcome,
3721since the \ExtractAndMoveMethod refactoring introduces almost 2,500 new methods
3722into the project.
3723
3724The only number that can save the refactoring's impact on complexity on the
3725class level, is the ``Avoid too complex class'' rule. It improves with 2.5\%,
3726thus indicating that the complexity is moderately better distributed between the
3727classes after the refactoring than before.
3728
3729\subsubsection{Coupling}
3730One of the hopes when starting this project, was to be able to make a
3731refactoring that could lower the coupling between classes. Better complexity at
39884128 3732the method level is a not very unexpected byproduct of dividing methods into
7fba220b
EK
3733smaller parts. Lowering the coupling on the other hand, is a far greater task.
3734This is also reflected in the results for the only coupling rule defined in the
3735\name{SonarQube} quality profile, namely the ``Classes should not be coupled to
3736too many
3737other classes (Single Responsibility Principle)'' rule.
3738
3739The number of issues for the coupling rule is 1,098 before the refactoring, and
37401,199 afterwards. This is an increase in issues of 9.2\%, and a blow for this
3741project. These numbers can be interpreted two ways. The first possibility is
3742that our assumptions are wrong, and that increasing indirection does not
3743decrease coupling between classes. The other possibility is that our analysis
3744and choices of candidate text selections are not good enough. I vote for the
e36eade0 3745second possibility. (Voting against the public opinion may also be a little
7fba220b
EK
3746bold.)
3747
3748What probably happens is, that many of the times the \ExtractAndMoveMethod
3749refactoring is performed, the \MoveMethod refactoring ``drags'' with it
3750references to classes that are unknown to the method destination. If it happens
3751to be so lucky that it removes a dependency from one class, it might as well
3752introduce three new dependencies to another class. In those situations that a
3753class does not know about the originating class of a moved method, the
078b1e4a 3754\MoveMethod refactoring most certainly will introduce a dependency. This is
39884128 3755because there is a
078b1e4a 3756bug\footnote{\url{https://bugs.eclipse.org/bugs/show\_bug.cgi?id=228635}} in the
39884128
EK
3757refactoring, making it pass an instance of the originating class as a reference
3758to the moved method, regardless of whether the reference is used in the method
3759body or not.
7fba220b
EK
3760
3761There is also the possibility that the heuristics used to find candidate text
3762selections are not good enough, they most certainly are not. I wish I had more
3763time to fine-tune them, and to complete the analysis part of the project, but
3764this is simply not the case. This becomes even clearer when analyzing the unit
3765test results for the after-code.
3766
3767\subsubsection{Totals}
3768On the bright side, the total number of issues is lower after the refactoring
3769than it was before. Before the refactoring, the total number of issues is
37708,270, and after it is 8,155. An improvement of only 1.4\%.
3771
3772Then \name{SonarQube} tells me that the total complexity has increased by
37732.9\%, and that the (more questionable) ``technical debt'' has increased from
37741,003.4 to 1,032.7 days, also a deterioration of 2.9\%. Although these numbers
3775are similar, no correlation has been found between them.
3776
3777\subsection{Unit tests}
39884128
EK
3778The tests that have been run for the \name{Eclipse JDT UI} project, are the
3779tests in the test suites specified as the main test suites on the JDT UI wiki
3780page on how to contribute to the
3781project\footnote{\url{https://wiki.eclipse.org/JDT\_UI/How\_to\_Contribute\#Unit\_Testing}}.
3782
3783\subsubsection{Before the refactoring}
3784Running the tests for the before-code of Eclipse JDT UI yielded 4 errors and 3
3785failures for the \type{AutomatedSuite} test suite (2,007 test cases), and 2
3786errors and
37873 failures for the \type{AllAllRefactoringTests} test suite (3,816 test cases).
3788
3789\subsubsection{After the refactoring}
3790The test results for the after-code of the Eclipse JDT UI project is another
01d46361
EK
3791story. The reason for this is that during the setup for the unit tests, Eclipse
3792now reports that the project contains 322 fatal errors, and a lot of errors that
3793probably follows from these. This is another blow for this master's project.
39884128
EK
3794
3795It has now been shown that the \ExtractAndMoveMethod refactoring, in its current
3796state, produces code that does not compile. Had these errors originated from
3797only one bug, it would not have been much of a problem, but this is not the
e36eade0 3798case. By only looking at some random compilation problems in the refactored code,
01d46361
EK
3799I came up with at least four different bugs \todo{write bug reports?} that
3800caused those problems. I then stopped looking for more, since some of the bugs
3801would take more time to fix than I could justify using on them at this point.
39884128
EK
3802
3803The only thing that can be said in my defence, is that all the compilation
3804errors could have been avoided if the type of situations that causes them was
3805properly handled by the primitive refactorings, that again are supported by the
e36eade0 3806Eclipse JDT UI project. All of the four randomly found bugs that I mentioned
39884128
EK
3807before, are also weaknesses of the \MoveMethod refactoring. If the primitive
3808refactorings had detected the up-coming errors
3809in their precondition checking phase, the refactorings would have been aborted,
3810since this is how the \ExtractAndMoveMethod refactoring handles such situations.
3811
3812Of course, taking all possible situations into account is an immense task. This
3813is one of the reasons for the failure. A complete analysis is too big of a task
3814for this master's project to handle. Looking at it now, this comes as no
3815surprise, since the task is obviously also too big for the creators of the
3816primitive \MoveMethod refactoring. This shows that the underlying primitive
3817refactorings are not complete enough to be fully relied upon for avoiding
3818compilation errors.
3819
3820Considering all these problems, it is difficult to know how to interpret the
3821unit test results from after refactoring the Eclipse JDT UI. The
3822\type{AutomatedSuite} reported 565 errors and 5 failures. Three of the failures
3823were the same as reported before the refactoring took place, so two of them are
3824new. For these two cases it is not immediately apparent what makes them behave
3825differently. The program is so complex that to analyze it to find this out, we
3826might need more powerful methods than just manually analyzing its source code.
3827This is somewhat characteristic for imperative programming: The programs are
3828often hard to analyze and understand.
3829
3830For the \type{AllAllRefactoringTests} test suite, the three failures are gone,
3831but the two errors have grown to 2,257 errors. I will not try to analyze those
3832errors.
3833
3834What I can say, is that it is likely that the \ExtractAndMoveMethod refactoring
3835has introduced some unintended behavioral changes. Let us say that the
3836refactoring introduces at least two behavior-altering changes for every 2,500
3837executions. More than that is difficult to say about the behavior-preserving
3838properties of the \ExtractAndMoveMethod refactoring, at this point.
3839
3840\subsection{Conclusions}
3841After automatically analyzing and executing the \ExtractAndMoveMethod
3842refactoring for all the methods in the Eclipse JDT UI project, the results does
3843not look that promising. For this case, the refactoring seems almost unusable as
3844it is now. The error rate and measurements done tells us this.
3845
3846The refactoring makes the code a little less complex at the method level. But
3847this is merely a side effect of extracting methods, and holds little scientific
3848value. When it comes to the overall complexity, it is increased, although it is
3849slightly better spread among the classes.
3850
3851The analysis done before the \ExtractAndMoveMethod refactoring, is currently not
3852complete enough to make the refactoring useful. It introduces too many errors in
3853the code, and the code may change it's behavior. It also remains to prove that
3854large scale refactoring with it can decrease coupling between classes. A better
3855analysis may prove this, but in its present state, the opposite is the fact. The
3856coupling measurements done by \name{SonarQube} shows this.
3857
3858On the bright side, the performance of the refactoring process is not that bad.
3859It shows that it is possible to make a tool the way we do, if we can make the
3860tool do anything useful. As long as the analysis phase is not going to involve
3861anything that uses to much disk access, a lot of analysis can be done in a
3862reasonable amount of time.
3863
3864The time used on performing the actual changes excludes a trial and error
3865approach with the tools used in this master's project. In a trial and error
3866approach, you could for instance be using the primitive refactorings used in
3867this project to refactor code, and only then make decisions based on the effect,
3868possibly shown by traditional software metrics. The problem with the approach
3869taken in this project, compared to a trial and error approach, is that using
3870heuristics beforehand is much more complicated. But on the other hand, a trial
3871and error approach would still need to face the challenges of producing code
3872that does compile without errors. If using refactorings that could produce
3873in-memory changes, a trial and error approach could be made more efficient.
078b1e4a
EK
3874
3875\section{Case 2: The \type{no.uio.ifi.refaktor} project}
01d46361 3876In this case we will see a form of the ``dogfooding'' methodology used, when
e36eade0 3877refactoring our own \type{no.uio.ifi.refaktor} project with the
078b1e4a
EK
3878\ExtractAndMoveMethod refactoring.
3879
3880In this case I will try to point out some differences from case 1, and how they
3881impact the execution of the benchmark. The refaktor project is 39 times smaller
3882than the Eclipse JDT UI project, measured in lines of code. This will make
3883things a bit more transparent. It will therefore be interesting to see if this
3884case can shed light on any aspect of our project that was lost in the larger
3885case 1.
3886
eb913f75
EK
3887The configuration for the experiment is specified in
3888\myref{tab:configurationCase2}.
3889
3890\begin{table}[htb]
3891 \caption{Configuration for Case 2.}
3892 \label{tab:configurationCase2}
3893 \centering
3894 \begin{tabularx}{\textwidth}{@{}>{\bfseries}L{0.67}L{1.33}@{}}
3895 \toprule
3896 \spancols{2}{Benchmark data} \\
3897 \midrule
3898 Launch configuration & CaseStudyDogfooding.launch \\
3899 Project & no.uio.ifi.refaktor.benchmark \\
3900 Repository & gitolite@git.uio.no:ifi-stolz-refaktor \\
3901 Commit & 43c16c04520746edd75f8dc2a1935781d3d9de6c \\
3902 \midrule
3903 \spancols{2}{Input data} \\
3904 \midrule
3905 Project & no.uio.ifi.refaktor \\
3906 Repository & gitolite@git.uio.no:ifi-stolz-refaktor \\
3907 Commit & 43c16c04520746edd75f8dc2a1935781d3d9de6c \\
3908 Branch & master \\
e36eade0 3909 Test configuration & no.uio.ifi.refaktor.tests/ExtractTest.launch \\
eb913f75
EK
3910 \bottomrule
3911 \end{tabularx}
3912\end{table}
078b1e4a 3913
eb913f75
EK
3914\subsection{Statistics}
3915The statistics gathered during the refactoring execution is presented in
3916\myref{tab:case2Statistics}.
3917
3918\begin{table}[htb]
3919 \caption{Statistics after batch refactoring the \type{no.uio.ifi.refaktor}
3920project with the \ExtractAndMoveMethod refactoring.}
3921 \label{tab:case2Statistics}
3922 \centering
3923 \begin{tabularx}{\textwidth}{@{}>{\bfseries}L{1.5}R{0.5}@{}}
3924 \toprule
3925 \spancols{2}{Time used} \\
3926 \midrule
3927 Total time & 1m15s \\
3928 Analysis time & 0m18s (24\%) \\
3929 Change time & 0m47s (63\%) \\
3930 Miscellaneous tasks & 0m10s (14\%) \\
3931 \midrule
3932 \spancols{2}{Numbers of each type of entity analyzed} \\
3933 \midrule
3934 Packages & 33 \\
3935 Compilation units & 154 \\
3936 Types & 168 \\
3937 Methods & 1,070 \\
3938 Text selections & 8,609 \\
3939 \midrule
3940 \spancols{2}{Numbers for \ExtractAndMoveMethod refactoring candidates} \\
3941 \midrule
3942 Methods chosen as candidates & 58 \\
3943 Methods NOT chosen as candidates & 1,012 \\
3944 Candidate selections (multiple per method) & 227 \\
3945 \midrule
3946 \spancols{2}{\ExtractAndMoveMethod refactorings executed} \\
3947 \midrule
3948 Fully executed & 53 \\
3949 Not fully executed & 5 \\
3950 Total attempts & 58 \\
3951 \midrule
3952 \spancols{2}{Primitive refactorings executed} \\
3953 \spancols{2}{\small \ExtractMethod refactorings} \\
3954 \midrule
3955 Performed & 56 \\
3956 Not performed & 2 \\
3957 Total attempts & 58 \\
3958 \midrule
3959 \spancols{2}{\small \MoveMethod refactorings} \\
3960 \midrule
3961 Performed & 53 \\
3962 Not performed & 3 \\
3963 Total attempts & 56 \\
3964
3965 \bottomrule
3966 \end{tabularx}
3967\end{table}
3968
078b1e4a
EK
3969\subsubsection{Differences}
3970There are some differences between the two projects that make them a little
3971difficult to compare by performance.
3972
3973\paragraph{Different complexity.}
3974Although the JDT UI project is 39 times greater than the refaktor project in
3975terms of lines of code, it is only about 26 times its size measured in numbers
3976of methods. This means that the methods in the refaktor project are smaller in
3977average than in the JDT project. This is also reflected in the \name{SonarQube}
3978report, where the complexity per method for the JDT project is 3.6, while the
3979refaktor project has a complexity per method of 2.1.
3980
3981\paragraph{Number of selections per method.}
3982The analysis for the JDT project processed 21 text selections per method in
3983average. This number for the refaktor project is only 8 selections per method
3984analyzed. This is a direct consequence of smaller methods.
3985
3986\paragraph{Different candidates to methods ratio.}
3987The differences in how the projects are factored are also reflected in the
3988ratios for how many methods that are chosen as candidates compared to the total
3989number of methods analyzed. For the JDT project, 9\% of the methods was
3990considered to be candidates, while for the refaktor project, only 5\% of the
3991methods was chosen.
3992
3993\paragraph{The average number of possible candidate selection.}
e36eade0 3994For the methods that are chosen as candidates, the average number of possible
078b1e4a
EK
3995candidate selections for these methods differ quite much. For the JDT project,
3996the number of possible candidate selections for these methods were 14.44
3997selections per method, while the candidate methods in the refaktor project had
3998only 3.91 candidate selections to choose from, in average.
3999
eb913f75 4000\subsubsection{Execution time}
078b1e4a 4001The differences in complexity, and the different candidate methods to total
e36eade0 4002number of methods ratios, is shown in the distributions of the execution times.
078b1e4a
EK
4003For the JDT project, 75\% of the total time was used on the actual changes,
4004while for the refaktor project, this number was only 63\%.
eb913f75 4005
078b1e4a
EK
4006For the JDT project, the benchmark used on average 0.21 seconds per method in
4007the project, while for the refaktor project it used only 0.07 seconds per
4008method. So the process used 3 times as much time per method for the JDT project
4009than for the refaktor project.
4010
4011While the JDT project is 39 times larger than the refaktor project measured in
4012lines of code, the benchmark used about 79 times as long time on it than for the
4013refaktor project. Relatively, this is about twice as long.
4014
4015Since the details of these execution times are not that relevant to this
4016master's project, only their magnitude, I will leave them here.
eb913f75
EK
4017
4018\subsubsection{Executed refactorings}
078b1e4a
EK
4019For the composite \ExtractAndMoveMethod refactoring performed in case 2, 53
4020successful attempts out of 58 gives a success rate of 91.4\%. This is 5.3
e36eade0 4021percentage points worse than for case 1.
eb913f75
EK
4022
4023\subsection{\name{SonarQube} analysis}
4024Results from the \name{SonarQube} analysis is shown in
4025\myref{tab:case2ResultsProfile1}.
4026
078b1e4a
EK
4027Not much is to be said about these results. The trends in complexity and
4028coupling are the same. We end up a little worse after the refactoring process
4029than before.
4030
eb913f75
EK
4031\begin{table}[htb]
4032 \caption{Results for analyzing the \var{no.uio.ifi.refaktor} project, before
4033 and after the refactoring, with \name{SonarQube} and the \name{IFI Refaktor
4034 Case Study} quality profile. (Bold numbers are better.)}
4035 \label{tab:case2ResultsProfile1}
4036 \centering
4037 \begin{tabularx}{\textwidth}{@{}>{\bfseries}L{1.5}R{0.25}R{0.25}@{}}
4038 \toprule
4039 \textnormal{Number of issues for each rule} & Before & After \\
4040 \midrule
4041 Avoid too complex class & 1 & 1 \\
4042 Classes should not be coupled to too many other classes (Single
4043 Responsibility Principle) & \textbf{29} & 34 \\
4044 Control flow statements \ldots{} should not be nested too deeply & 24 &
4045 \textbf{21} \\
4046 Methods should not be too complex & 17 & \textbf{15} \\
4047 Methods should not have too many lines & 41 & \textbf{40} \\
4048 NPath Complexity & 3 & 3 \\
4049 Too many methods & \textbf{13} & 15 \\
4050 \midrule
4051 Total number of issues & \textbf{128} & 129 \\
4052 \midrule
4053 \midrule
4054 \spancols{3}{Complexity} \\
4055 \midrule
4056 Per function & 2.1 & 2.1 \\
4057 Per class & \textbf{12.5} & 12.9 \\
4058 Per file & \textbf{13.8} & 14.2 \\
4059 \midrule
4060 Total complexity & \textbf{2,089} & 2,148 \\
4061 \midrule
4062 \midrule
4063 \spancols{3}{Numbers of each type of entity analyzed} \\
4064 \midrule
4065 Files & 151 & 151 \\
4066 Classes & 167 & 167 \\
4067 Functions & 987 & 1,045 \\
4068 Accessors & 35 & 30 \\
4069 Statements & 3,355 & 3,416 \\
078b1e4a 4070 Lines of code & 8,238 & 8,460 \\
eb913f75
EK
4071 \midrule
4072 Technical debt (in days) & \textbf{19.0} & 20.7 \\
4073 \bottomrule
4074 \end{tabularx}
4075\end{table}
4076
078b1e4a
EK
4077\subsection{Unit tests}
4078The tests used for this case are the same that has been developed throughout the
4079master's project.
4080
4081The code that was refactored for this case suffered from some of the problems
4082discovered in case 1. This means that the after-code for case 2 also contained
4083compilation errors, but they were not as many. The code contained only 6 errors
4084that made the code not compile.
4085
4086All of the errors made, originated from the same bug. It is a bug that happens
4087in situation where a class instance creation is moved from between packages, and
4088the class for the instance is package-private. The \MoveMethod refactoring does
e36eade0 4089not detect that there will be a visibility problem, and neither does it promote
078b1e4a
EK
4090the package-private class to be public.
4091
4092Since the errors was easy to fix manually, I corrected them and ran the unit
4093tests as planned. Before the refactoring, all tests passed. All tests also
4094passed after the refactoring, with the six error corrections. Since the
4095corrections done is not of a kind that could make the behavior of the program
4096change, it is likely that the refactorings done to the
4097\type{no.uio.ifi.refaktor} project did not change its behavior. This is also
4098supported by the informal experiment presented next.
4099
4100\subsection{An informal experiment}
e36eade0 4101To complete the task of ``eating my own dog food'', I conducted an informal
078b1e4a
EK
4102experiment where I used the refactored version of the \type{no.uio.ifi.refaktor}
4103project, with the corrections, to again refaktor ``itself''.
4104
4105The experiment produced code containing the same six errors as after the
4106previous experiment. I also compared the after-code from the two experiments
4107with a diff-tool. The only differences found was different method names. This is
4108expected, since the method names are randomly generated by the
4109\ExtractAndMoveMethod refactoring.
eb913f75 4110
078b1e4a
EK
4111The outcome of this simple experiment makes me more confident that the
4112\ExtractAndMoveMethod refactoring made only behavior-preserving changes to the
4113\type{no.uio.ifi.refaktor} project, apart from the compilation errors.
eb913f75 4114
078b1e4a
EK
4115\subsection{Conclusions}
4116The differences in complexity between the Eclipse JDT UI project and the
4117\type{no.uio.ifi.refaktor} project, clearly influenced the differences in their
4118execution times. This is mostly because fewer of the methods were chosen to be
4119refactored for the refaktor project than for the JDT project. What this makes
e36eade0 4120difficult, is to know if there are any severe performance penalties associated
078b1e4a 4121with refactoring on a large project compared to a small one.
eb913f75 4122
078b1e4a
EK
4123The trends in the \name{SonarQube} analysis are the same for this case as for
4124the previous one. This gives more confidence in the these results.
eb913f75 4125
078b1e4a
EK
4126By refactoring our own code and using it again to refactor our code, we showed
4127that it is possible to write an automated composite refactoring that works for
4128many cases. That it probably did not alter the behavior of a smaller project
4129shows us nothing more than that though, and might just be a coincidence.
eb913f75 4130
078b1e4a 4131\section{Summary}
01d46361 4132\todoin{Write? Or wrap up in final conclusions?}
c9488804 4133
bc7b5d67 4134\chapter{Benchmarking}\label{sec:benchmarking}
60065669 4135This part of the master's project is located in the \name{Eclipse} project
356782a0
EK
4136\code{no.uio.ifi.refaktor.benchmark}. The purpose of it is to run the equivalent
4137of the \type{SearchBasedExtractAndMoveMethodChanger}
4138\see{searchBasedExtractAndMoveMethodChanger} over a larger software project,
3ab3e132 4139both to test its robustness but also its effect on different software metrics.
356782a0
EK
4140
4141\section{The benchmark setup}
fe0a4c48 4142The benchmark itself is set up as a \name{JUnit} test case. This is a convenient
7940aed1
EK
4143setup, and utilizes the \name{JUnit Plugin Test Launcher}. This provides us with
4144a fully functional \name{Eclipse} workbench. Most importantly, this gives us
4145access to the Java Model of \name{Eclipse} \see{javaModel}.
356782a0
EK
4146
4147\subsection{The ProjectImporter}
4148The Java project that is going to be used as the data for the benchmark, must be
4149imported into the JUnit workspace. This is done by the
4150\typewithref{no.uio.ifi.refaktor.benchmark}{ProjectImporter}. The importer
4151require the absolute path to the project description file. It is named
4152\code{.project} and is located at the root of the project directory.
4153
4154The project description is loaded to find the name of the project to be
4155imported. The project that shall be the destination for the import is created in
4156the workspace, on the base of the name from the description. Then an import
4157operation is created, based on both the source and destination information. The
4158import operation is run to perform the import.
4159
4160I have found no simple API call to accomplish what the importer does, which
fe0a4c48
EK
4161tells me that it may not be too many people performing this particular action.
4162The solution to the problem was found on \name{Stack
356782a0 4163Overflow}\footnote{\url{https://stackoverflow.com/questions/12401297}}. It
3ab3e132 4164contains enough dirty details to be considered inconvenient to use, if not
356782a0
EK
4165wrapping it in a class like my \type{ProjectImporter}. One would probably have
4166to delve into the source code for the import wizard to find out how the import
4167operation works, if no one had already done it.
4168
8fe94c0b
EK
4169\section{Statistics}
4170Statistics for the analysis and changes is captured by the
4171\typewithref{no.uio.ifi.refaktor.aspects}{StatisticsAspect}. This an
fe0a4c48 4172\emph{aspect} written in \name{AspectJ}.
8fe94c0b
EK
4173
4174\subsection{AspectJ}
fe0a4c48 4175\name{AspectJ}\footnote{\url{http://eclipse.org/aspectj/}} is an extension to
8fe94c0b
EK
4176the Java language, and facilitates combining aspect-oriented programming with
4177the object-oriented programming in Java.
4178
4179Aspect-oriented programming is a programming paradigm that is meant to isolate
4180so-called \emph{cross-cutting concerns} into their own modules. These
dd73ba39
EK
4181cross-cutting concerns are functionalities that spans over multiple classes, but
4182may not belong naturally in any of them. It can be functionality that does not
4183concern the business logic of an application, and thus may be a burden when
4184entangled with parts of the source code it does not really belong. Examples
4185include logging, debugging, optimization and security.
8fe94c0b 4186
416b6888
EK
4187Aspects are interacting with other modules by defining advices. The concept of
4188an \emph{advice} is known from both aspect-oriented and functional
4189programming\citing{wikiAdvice2014}. It is a function that modifies another
4190function when the latter is run. An advice in AspectJ is somewhat similar to a
4191method in Java. It is meant to alter the behavior of other methods, and contains
4192a body that is executed when it is applied.
8fe94c0b
EK
4193
4194An advice can be applied at a defined \emph{pointcut}. A pointcut picks out one
4195or more \emph{join points}. A join point is a well-defined point in the
4196execution of a program. It can occur when calling a method defined for a
4197particular class, when calling all methods with the same name,
4198accessing/assigning to a particular field of a given class and so on. An advice
dd73ba39
EK
4199can be declared to run both before, after returning from a pointcut, when there
4200is thrown an exception in the pointcut or after the pointcut either returns or
4201throws an exception. In addition to picking out join points, a pointcut can
4202also bind variables from its context, so they can be accessed in the body of an
4203advice. An example of a pointcut and an advice is found in
4204\myref{lst:aspectjExample}.
8fe94c0b
EK
4205
4206\begin{listing}[h]
c8088eec 4207\begin{minted}{aspectj}
8fe94c0b
EK
4208pointcut methodAnalyze(
4209 SearchBasedExtractAndMoveMethodAnalyzer analyzer) :
4210 call(* SearchBasedExtractAndMoveMethodAnalyzer.analyze())
4211 && target(analyzer);
4212
4213after(SearchBasedExtractAndMoveMethodAnalyzer analyzer) :
4214 methodAnalyze(analyzer) {
4215 statistics.methodCount++;
4216 debugPrintMethodAnalysisProgress(analyzer.method);
4217}
4218\end{minted}
4219\caption{An example of a pointcut named \method{methodAnalyze},
4220and an advice defined to be applied after it has occurred.}
4221\label{lst:aspectjExample}
4222\end{listing}
4223
dd73ba39
EK
4224\subsection{The Statistics class}
4225The statistics aspect stores statistical information in an object of type
4226\type{Statistics}. As of now, the aspect needs to be initialized at the point in
4227time where it is desired that it starts its data gathering. At any point in time
4228the statistics aspect can be queried for a snapshot of the current statistics.
4229
4230The \type{Statistics} class also include functionality for generating a report
4231of its gathered statistics. The report can be given either as a string or it can
4232be written to a file.
4233
4234\subsection{Advices}
4235The statistics aspect contains advices for gathering statistical data from
4236different parts of the benchmarking process. It captures statistics from both
4237the analysis part and the execution part of the composite \ExtractAndMoveMethod
4238refactoring.
4239
4240For the analysis part, there are advices to count the number of text selections
4241analyzed and the number of methods, types, compilation units and packages
4242analyzed. There are also advices that counts for how many of the methods there
4243is found a selection that is a candidate for the refactoring, and for how many
3ab3e132 4244methods there is not.
dd73ba39
EK
4245
4246There exists advices for counting both the successful and unsuccessful
4247executions of all the refactorings. Both for the \ExtractMethod and \MoveMethod
4248refactorings in isolation, as well as for the combination of them.
4249
8fe94c0b 4250\section{Optimizations}
41293210 4251When looking for optimizations to make for the benchmarking process, I used the
60065669
EK
4252\name{VisualVM}\footnote{\url{http://visualvm.java.net/}} \gloss{profiler} for
4253the Java Virtual Machine to both profile the application and also to make memory
4254dumps of its heap.
8fe94c0b
EK
4255
4256\subsection{Caching}
60065669
EK
4257When \gloss{profiling} the benchmark process before making any optimizations, it
4258early became apparent that the parsing of source code was a place to direct
4259attention towards. This discovery was done when only \emph{analyzing} source
4260code, before trying to do any \emph{manipulation} of it. Caching of the parsed
4261ASTs seemed like the best way to save some time, as expected. With only a simple
4262cache of the most recently used AST, the analysis time was speeded up by a
4263factor of around 20. This number depends a little upon which type of system the
4264analysis is run.
41293210
EK
4265
4266The caching is managed by a cache manager, that now, by default, utilizes the
4267not so well known feature of Java called a \emph{soft reference}. Soft
4268references are best explained in the context of weak references. A \emph{weak
4269reference} is a reference to an object instance that is only guaranteed to
4270persist as long as there is a \emph{strong reference} or a soft reference
4271referring the same object. If no such reference is found, its referred object is
4272garbage collected. A strong reference is basically the same as a regular Java
4273reference. A soft reference has the same guarantees as a week reference when it
4274comes to its relation to strong references, but it is not necessarily garbage
4275collected whenever there exists no strong references to it. A soft reference
4276\emph{may} reside in memory as long as the JVM has enough free memory in the
4277heap. A soft reference will therefore usually perform better than a weak
4278reference when used for simple caching and similar tasks. The way to use a
4279soft/weak reference is to as it for its referent. The return value then has to
4280be tested to check that it is not \var{null}. For the basic usage of soft
4281references, see \myref{lst:softReferenceExample}. For a more thorough
4282explanation of weak references in general, see\citing{weakRef2006}.
4283
4284\begin{listing}[h]
4285\begin{minted}{java}
4286// Strong reference
4287Object strongRef = new Object();
4288
4289// Soft reference
4290SoftReference<Object> softRef =
4291 new SoftReference<Object>(new Object());
4292
4293// Using the soft reference
4294Object obj = softRef.get();
4295if (obj != null) {
4296 // Use object here
4297}
4298\end{minted}
4299\caption{Showing the basic usage of soft references. Weak references is used the
4300 same way. {\footnotesize (The references are part of the \code{java.lang.ref}
4301package.)}}
4302\label{lst:softReferenceExample}
4303\end{listing}
4304
4305The cache based on soft references has no limit for how many ASTs it caches. It
4306is generally not advisable to keep references to ASTs for prolonged periods of
4307time, since they are expensive structures to hold on to. For regular plugin
fe0a4c48 4308development, \name{Eclipse} recommends not creating more than one AST at a time to
41293210
EK
4309limit memory consumption. Since the benchmarking has nothing to do with user
4310experience, and throughput is everything, these advices are intentionally
fe0a4c48 4311ignored. This means that during the benchmarking process, the target \name{Eclipse}
41293210
EK
4312application may very well work close to its memory limit for the heap space for
4313long periods during the benchmark.
8fe94c0b 4314
81cf9554
EK
4315\subsection{Candidates stored as mementos}
4316When performing large scale analysis of source code for finding candidates to
4317the \ExtractAndMoveMethod refactoring, memory is an issue. One of the inputs to
4318the refactoring is a variable binding. This variable binding indirectly retains
4319a whole AST. Since ASTs are large structures, this quickly leads to an
4320\type{OutOfMemoryError} if trying to analyze a large project without optimizing
4321how we store the candidates data. This means that the JVM cannot allocate more
4322memory for out benchmark, and it exists disgracefully.
4323
4324A possible solution could be to just allow the JVM to allocate even more memory,
4325but this is not a dependable solution. The allocated memory could easily
4326supersede the physical memory of a machine, and that would make the benchmark go
4327really slow.
4328
4329Thus, the candidates data must be stored in another format. Therefore, we use
4330the \gloss{mementoPattern} to store the variable binding information. This is
4331done in a way that makes it possible to retrieve the variable binding at a later
4332point. The data that is stored to achieve this, is the key to the original
4333variable binding. In addition to the key, we know which method and text
4334selection the variable is referenced in, so that we can find it by parsing the
4335source code and search for it when it is needed.
356782a0 4336
98660ec0
EK
4337\section{Handling failures}
4338\todoin{write}
4339
a7514fbd
EK
4340
4341\chapter{Technicalities}
4342
4343\section{Source code organization}
60065669 4344All the parts of this master's project is under version control with
a7514fbd
EK
4345\name{Git}\footnote{\url{http://git-scm.com/}}.
4346
4347The software written is organized as some \name{Eclipse} plugins. Writing a plugin is
4348the natural way to utilize the API of \name{Eclipse}. This also makes it possible to
4349provide a user interface to manually run operations on selections in program
4350source code or whole projects/packages.
4351
4352When writing a plugin in \name{Eclipse}, one has access to resources such as the
4353current workspace, the open editor and the current selection.
4354
4355The thesis work is contained in the following Eclipse projects:
4356
4357\begin{description}
4358 \item[no.uio.ifi.refaktor] \hfill \\ This is the main Eclipse plugin
4359 project, and contains all of the business logic for the plugin.
4360
4361 \item[no.uio.ifi.refaktor.tests] \hfill \\
4362 This project contains the tests for the main plugin.
4363
4364 \item[no.uio.ifi.refaktor.examples] \hfill \\
4365 Contains example code used in testing. It also contains code for managing
4366 this example code, such as creating an Eclipse project from it before a test
4367 run.
4368
4369 \item[no.uio.ifi.refaktor.benchmark] \hfill \\
4370 This project contains code for running search based versions of the
4371 composite refactoring over selected Eclipse projects.
4372
4373 \item[no.uio.ifi.refaktor.releng] \hfill \\
4374 Contains the rmap, queries and target definitions needed by by Buckminster
4375 on the Jenkins continuous integration server.
4376
4377\end{description}
4378
4379\subsection{The no.uio.ifi.refaktor project}
4380
4381\subsubsection{no.uio.ifi.refaktor.analyze}
e36eade0
EK
4382This package, and its sub-packages, contains code that is used for analyzing
4383Java source code. The most important sub-packages are presented below.
a7514fbd
EK
4384
4385\begin{description}
4386 \item[no.uio.ifi.refaktor.analyze.analyzers] \hfill \\
4387 This package contains source code analyzers. These are usually responsible
4388 for analyzing text selections or running specialized analyzers for different
4389 kinds of entities. Their structure are often hierarchical. This means that
4390 you have an analyzer for text selections, that in turn is utilized by an
4391 analyzer that analyzes all the selections of a method. Then there are
4392 analyzers for analyzing all the methods of a type, all the types of a
4393 compilation unit, all the compilation units of a package, and, at last, all
4394 of the packages in a project.
4395
4396 \item[no.uio.ifi.refaktor.analyze.checkers] \hfill \\
4397 A package containing checkers. The checkers are classes used to validate
4398 that a selection can be further analyzed and chosen as a candidate for a
4399 refactoring. Invalidating properties can be such as usage of inner classes
4400 or the need for multiple return values.
4401
4402 \item[no.uio.ifi.refaktor.analyze.collectors] \hfill \\
4403 This package contains the property collectors. Collectors are used to gather
4404 properties from a text selection. This is mostly properties regarding
4405 referenced names and their occurrences. It is these properties that makes up
4406 the basis for finding the best candidates for a refactoring.
4407\end{description}
4408
4409\subsubsection{no.uio.ifi.refaktor.change}
e36eade0 4410This package, and its sub-packages, contains functionality for manipulate source
a7514fbd
EK
4411code.
4412
4413\begin{description}
4414 \item[no.uio.ifi.refaktor.change.changers] \hfill \\
4415 This package contains source code changers. They are used to glue together
4416 the analysis of source code and the actual execution of the changes.
4417
4418 \item[no.uio.ifi.refaktor.change.executors] \hfill \\
4419 The executors that are responsible for making concrete changes are found in
4420 this package. They are mostly used to create and execute one or more Eclipse
4421 refactorings.
4422
4423 \item[no.uio.ifi.refaktor.change.processors] \hfill \\
4424 Contains a refactoring processor for the \MoveMethod refactoring. The code
4425 is stolen and modified to fix a bug. The related bug is described in
4426 \myref{eclipse_bug_429416}.
4427
4428\end{description}
4429
4430\subsubsection{no.uio.ifi.refaktor.handlers}
4431This package contains handlers for the commands defined in the plugin manifest.
4432
4433\subsubsection{no.uio.ifi.refaktor.prefix}
4434This package contains the \type{Prefix} type that is the data representation of
4435the prefixes found by the \type{PrefixesCollector}. It also contains the prefix
4436set for storing and working with prefixes.
4437
4438\subsubsection{no.uio.ifi.refaktor.statistics}
4439The package contains statistics functionality. Its heart is the statistics
4440aspect that is responsible for gathering statistics during the execution of the
4441\ExtractAndMoveMethod refactoring.
4442
4443\begin{description}
4444 \item[no.uio.ifi.refaktor.statistics.reports] \hfill \\
4445 This package contains a simple framework for generating reports from the
4446 statistics data generated by the aspect. Currently, the only available
4447 report type is a simple text report.
4448
4449\end{description}
4450
4451
4452\subsubsection{no.uio.ifi.refaktor.textselection}
4453This package contains the two custom text selections that are used extensively
4454throughout the project. One of them is just a subclass of the other, to support
4455the use of the memento pattern to optimize the memory usage during benchmarking.
4456
4457\subsubsection{no.uio.ifi.refaktor.debugging}
4458The package contains a debug utility class. I addition to this, the package
4459\code{no.uio.ifi.refaktor.utils.aspects} contains a couple of aspects used for
4460debugging purposes.
4461
4462\subsubsection{no.uio.ifi.refaktor.utils}
4463Utility package that contains all the functionality that has to do with parsing
4464of source code. It also has utility classes for looking up handles to methods
4465and types et cetera.
4466
4467\begin{description}
4468 \item[no.uio.ifi.refaktor.utils.caching] \hfill \\
4469 This package contains the caching manager for compilation units, along with
4470 classes for different caching strategies.
4471
4472 \item[no.uio.ifi.refaktor.utils.nullobjects] \hfill \\
4473 Contains classes for creating different null objects. Most of the classes is
4474 used to represent null objects of different handle types. These null objects
4475 are returned from various utility classes instead of returning a \var{null}
4476 value when other values are not available.
4477
4478\end{description}
4479
4480\section{Continuous integration}
4481The continuous integration server
4482\name{Jenkins}\footnote{\url{http://jenkins-ci.org/}} has been set up for the
4483project\footnote{A work mostly done by the supervisor.}. It is used as a way to
4484run tests and perform code coverage analysis.
4485
4486To be able to build the \name{Eclipse} plugins and run tests for them with Jenkins, the
4487component assembly project
4488\name{Buckminster}\footnote{\url{http://www.eclipse.org/buckminster/}} is used,
4489through its plugin for Jenkins. Buckminster provides for a way to specify the
4490resources needed for building a project and where and how to find them.
4491Buckminster also handles the setup of a target environment to run the tests in.
4492All this is needed because the code to build depends on an \name{Eclipse}
4493installation with various plugins.
4494
4495\subsection{Problems with AspectJ}
4496The Buckminster build worked fine until introducing AspectJ into the project.
4497When building projects using AspectJ, there are some additional steps that needs
4498to be performed. First of all, the aspects themselves must be compiled. Then the
4499aspects needs to be woven with the classes they affect. This demands a process
4500that does multiple passes over the source code.
4501
4502When using AspectJ with \name{Eclipse}, the specialized compilation and the
4503weaving can be handled by the \name{AspectJ Development
4504Tools}\footnote{\url{https://www.eclipse.org/ajdt/}}. This works all fine, but
4505it complicates things when trying to build a project depending on \name{Eclipse}
4506plugins outside of \name{Eclipse}. There is supposed to be a way to specify a
4507compiler adapter for javac, together with the file extensions for the file types
4508it shall operate. The AspectJ compiler adapter is called
4509\typewithref{org.aspectj.tools.ant.taskdefs}{Ajc11CompilerAdapter}, and it works
4510with files that has the extensions \code{*.java} and \code{*.aj}. I tried to
4511setup this in the build properties file for the project containing the aspects,
4512but to no avail. The project containing the aspects does not seem to be built at
4513all, and the projects that depends on it complains that they cannot find certain
4514classes.
4515
4516I then managed to write an \name{Ant}\footnote{\url{https://ant.apache.org/}}
4517build file that utilizes the AspectJ compiler adapter, for the
4518\code{no.uio.ifi.refaktor} plugin. The problem was then that it could no longer
4519take advantage of the environment set up by Buckminster. The solution to this
4520particular problem was of a ``hacky'' nature. It involves exporting the plugin
4521dependencies for the project to an Ant build file, and copy the exported path
4522into the existing build script. But then the Ant script needs to know where the
4523local \name{Eclipse} installation is located. This is no problem when building
4524on a local machine, but to utilize the setup done by Buckminster is a problem
4525still unsolved. To get the classpath for the build setup correctly, and here
4526comes the most ``hacky'' part of the solution, the Ant script has a target for
4527copying the classpath elements into a directory relative to the project
4528directory and checking it into Git. When no \code{ECLIPSE\_HOME} property is set
4529while running Ant, the script uses the copied plugins instead of the ones
4530provided by the \name{Eclipse} installation when building the project. This
4531obviously creates some problems with maintaining the list of dependencies in the
4532Ant file, as well as remembering to copy the plugins every time the list of
4533dependencies change.
4534
4535The Ant script described above is run by Jenkins before the Buckminster setup
4536and build. When setup like this, the Buckminster build succeeds for the projects
4537not using AspectJ, and the tests are run as normal. This is all good, but it
4538feels a little scary, since the reason for Buckminster not working with AspectJ
4539is still unknown.
4540
4541The problems with building with AspectJ on the Jenkins server lasted for a
4542while, before they were solved. This is reflected in the ``Test Result Trend''
4543and ``Code Coverage Trend'' reported by Jenkins.
4544
4545
e1d6ae87
EK
4546\chapter{Methodology}
4547
4548\section{Evolutionary design}
4549In the programming work for this project, it have tried to use a design strategy
4928aa0b
EK
4550called evolutionary design, also known as continuous or incremental
4551design\citing{wiki_continuous_2014}. It is a software design strategy
4552advocated by the Extreme Programming community. The essence of the strategy is
4553that you should let the design of your program evolve naturally as your
4554requirements change. This is seen in contrast with up-front design, where
4555design decisions are made early in the process.
e1d6ae87
EK
4556
4557The motivation behind evolutionary design is to keep the design of software as
4558simple as possible. This means not introducing unneeded functionality into a
4559program. You should defer introducing flexibility into your software, until it
4560is needed to be able to add functionality in a clean way.
4561
4562Holding up design decisions, implies that the time will eventually come when
4563decisions have to be made. The flexibility of the design then relies on the
4564programmer's abilities to perform the necessary refactoring, and \his confidence
4565in those abilities. From my experience working on this project, I can say that
4566this confidence is greatly enhanced by having automated tests to rely on
4567\see{tdd}.
4568
4569The choice of going for evolutionary design developed naturally. As Fowler
4570points out in his article \tit{Is Design Dead?}, evolutionary design much
4571resembles the ``code and fix'' development strategy\citing{fowler_design_2004}.
4572A strategy that most of us have practiced in school. This was also the case when
4573I first started this work. I had to learn the inner workings of Eclipse and its
4574refactoring-related plugins. That meant a lot of fumbling around with code I did
4575not know, in a trial and error fashion. Eventually I started writing tests for
4576my code, and my design began to evolve.
4577
4928aa0b
EK
4578\section{Test-driven development}\label{tdd}
4579As mentioned before, the project started out as a classic code and fix
4580developmen process. My focus was aimed at getting something to work, rather than
4581doing so according to best practice. This resulted in a project that got out of
4582its starting blocks, but it was not accompanied by any tests. Hence it was soon
4583difficult to make any code changes with the confidence that the program was
4584still correct afterwards (assuming it was so before changing it). I always knew
4585that I had to introduce some tests at one point, but this experience accelerated
4586the process of leading me onto the path of testing.
4587
4588I then wrote tests for the core functionality of the plugin, and thus gained
4589more confidence in the correctness of my code. I could now perform quite drastic
4590changes without ``wetting my pants``. After this, nearly all of the semantic
4591changes done to the business logic of the project, or the addition of new
4592functionality, was made in a test-driven manner. This means that before
4593performing any changes, I would define the desired functionality through a set
4594of tests. I would then run the tests to check that they were run and that they
4595did not pass. Then I would do any code changes necessary to make the tests
4596pass. The definition of how the program is supposed to operate is then captured
4597by the tests. However, this does not prove the correctness of the analysis
4598leading to the test definitions.
4599
4600\section{Continuous integration}
4601\todoin{???}
4602
4603
0cbeeadb
EK
4604\chapter{Conclusions and Future Work}
4605\todoin{Write}
4606
4607\section{Future work}
4608
4609\chapter{Related Work}
4610
4611\section{Safer refactorings}
4612\todoin{write}
4613
4614\section{The compositional paradigm of refactoring}
4615This paradigm builds upon the observation of Vakilian et
4616al.\citing{vakilian2012}, that of the many automated refactorings existing in
4617modern IDEs, the simplest ones are dominating the usage statistics. The report
4618mainly focuses on \name{Eclipse} as the tool under investigation.
4619
4620The paradigm is described almost as the opposite of automated composition of
4621refactorings \see{compositeRefactorings}. It works by providing the programmer
4622with easily accessible primitive refactorings. These refactorings shall be
4623accessed via keyboard shortcuts or quick-assist menus\footnote{Think
4624quick-assist with Ctrl+1 in \name{Eclipse}} and be promptly executed, opposed to in the
4625currently dominating wizard-based refactoring paradigm. They are meant to
4626stimulate composing smaller refactorings into more complex changes, rather than
4627doing a large upfront configuration of a wizard-based refactoring, before
4628previewing and executing it. The compositional paradigm of refactoring is
4629supposed to give control back to the programmer, by supporting \himher with an
4630option of performing small rapid changes instead of large changes with a lesser
4631degree of control. The report authors hope this will lead to fewer unsuccessful
4632refactorings. It also could lower the bar for understanding the steps of a
4633larger composite refactoring and thus also help in figuring out what goes wrong
4634if one should choose to op in on a wizard-based refactoring.
4635
4636Vakilian and his associates have performed a survey of the effectiveness of the
4637compositional paradigm versus the wizard-based one. They claim to have found
4638evidence of that the \emph{compositional paradigm} outperforms the
4639\emph{wizard-based}. It does so by reducing automation, which seem
4640counterintuitive. Therefore they ask the question ``What is an appropriate level
4641of automation?'', and thus questions what they feel is a rush toward more
4642automation in the software engineering community.
4643
4644
4645
4646\appendix
4647
4648
3727b75b 4649\chapter{Eclipse Bugs Found}
540ca7e4
EK
4650\newcommand{\submittedBugReport}[1]{The submitted bug report can be found on
4651 \url{#1}.}
94bb49f0
EK
4652
4653\section{Eclipse bug 420726: Code is broken when moving a method that is
0f6e45f8
EK
4654assigning to the parameter that is also the move
4655destination}\label{eclipse_bug_420726}
540ca7e4 4656This bug
94bb49f0 4657was found when analyzing what kinds of names that was to be considered as
3727b75b 4658\emph{unfixes} \see{unfixes}.
94bb49f0
EK
4659
4660\subsection{The bug}
4661The bug emerges when trying to move a method from one class to another, and when
4662the target for the move (must be a variable, local or field) is both a parameter
fe0a4c48 4663variable and also is assigned to within the method body. \name{Eclipse} allows this to
94bb49f0
EK
4664happen, although it is the sure path to a compilation error. This is because we
4665would then have an assignment to a \var{this} expression, which is not allowed
540ca7e4
EK
4666in Java.
4667\submittedBugReport{https://bugs.eclipse.org/bugs/show\_bug.cgi?id=420726}
94bb49f0
EK
4668
4669\subsection{The solution}
4670The solution to this problem is to add all simple names that are assigned to in
4671a method body to the set of unfixes.
128adb4f 4672
f1b6174d
EK
4673\section{Eclipse bug 429416: IAE when moving method from anonymous
4674class}\label{eclipse_bug_429416}
540ca7e4 4675I discovered
128adb4f
EK
4676this bug during a batch change on the \type{org.eclipse.jdt.ui} project.
4677
4678\subsection{The bug}
fe0a4c48 4679This bug surfaces when trying to use the \refa{Move Method} refactoring to move a
94bb49f0 4680method from an anonymous class to another class. This happens both for my
fe0a4c48
EK
4681simulation as well as in \name{Eclipse}, through the user interface. It only occurs
4682when \name{Eclipse} analyzes the program and finds it necessary to pass an instance of
94bb49f0 4683the originating class as a parameter to the moved method. I.e. it want to pass a
128adb4f
EK
4684\var{this} expression. The execution ends in an
4685\typewithref{java.lang}{IllegalArgumentException} in
4686\typewithref{org.eclipse.jdt.core.dom}{SimpleName} and its
4687\method{setIdentifier(String)} method. The simple name is attempted created in
4688the method
4689\methodwithref{org.eclipse.jdt.internal.corext.refactoring.structure.\\MoveInstanceMethodProcessor}{createInlinedMethodInvocation}
4690so the \type{MoveInstanceMethodProcessor} was early a clear suspect.
4691
4692The \method{createInlinedMethodInvocation} is the method that creates a method
4693invocation where the previous invocation to the method that was moved was. From
4694its code it can be read that when a \var{this} expression is going to be passed
4695in to the invocation, it shall be qualified with the name of the original
3ab3e132 4696method's declaring class, if the declaring class is either an anonymous class or
128adb4f
EK
4697a member class. The problem with this, is that an anonymous class does not have
4698a name, hence the term \emph{anonymous} class! Therefore, when its name, an
4699empty string, is passed into
4700\methodwithref{org.eclipse.jdt.core.dom.AST}{newSimpleName} it all ends in an
540ca7e4
EK
4701\type{IllegalArgumentException}.
4702\submittedBugReport{https://bugs.eclipse.org/bugs/show\_bug.cgi?id=429416}
128adb4f
EK
4703
4704\subsection{How I solved the problem}
4705Since the \type{MoveInstanceMethodProcessor} is instantiated in the
4706\typewithref{no.uio.ifi.refaktor.change.executors}{MoveMethod\-RefactoringExecutor},
4707and only need to be a
4708\typewithref{org.eclipse.ltk.core.refactoring.participants}{MoveProcessor}, I
4709was able to copy the code for the original move processor and modify it so that
4710it works better for me. It is now called
f1b6174d 4711\typewithref{no.uio.ifi.refaktor.change.processors}{ModifiedMoveInstanceMethodProcessor}.
128adb4f
EK
4712The only modification done (in addition to some imports and suppression of
4713warnings), is in the \method{createInlinedMethodInvocation}. When the declaring
4714class of the method to move is anonymous, the \var{this} expression in the
4715parameter list is not qualified with the declaring class' (empty) name.
4716
3727b75b
EK
4717\section{Eclipse bug 429954: Extracting statement with reference to local type
4718breaks code}\label{eclipse_bug_429954}
540ca7e4 4719The bug
a6415293 4720was discovered when doing some changes to the way unfixes is computed.
3727b75b
EK
4721
4722\subsection{The bug}
fe0a4c48 4723The problem is that \name{Eclipse} is allowing selections that references variables of
3727b75b
EK
4724local types to be extracted. When this happens the code is broken, since the
4725extracted method must take a parameter of a local type that is not in the
4726methods scope. The problem is illustrated in
540ca7e4
EK
4727\myref{lst:extractMethod_LocalClass}, but there in another setting.
4728\submittedBugReport{https://bugs.eclipse.org/bugs/show\_bug.cgi?id=429954}
3727b75b
EK
4729
4730\subsection{Actions taken}
4731There are no actions directly springing out of this bug, since the Extract
a6415293 4732Method refactoring cannot be meant to be this way. This is handled on the
fe0a4c48 4733analysis stage of our \refa{Extract and Move Method} refactoring. So names representing
3727b75b
EK
4734variables of local types is considered unfixes \see{unfixes}.
4735\todoin{write more when fixing this in legal statements checker}
9ff90080 4736\backmatter{}
fe0a4c48 4737\printglossaries
9ff90080 4738\printbibliography
055dca93 4739\listoftodos
9ff90080 4740\end{document}