1 \documentclass[USenglish,11pt]{ifimaster}
3 \usepackage[utf8]{inputenc}
4 \usepackage[T1]{fontenc,url}
5 \usepackage{lmodern} % using Latin Modern to be able to use bold typewriter font
11 \usepackage{tikz-qtree}
12 \usetikzlibrary{shapes,snakes,trees,arrows,shadows,positioning,calc}
13 \usepackage{babel,textcomp,csquotes,ifikompendiumforside}
16 \usepackage[hidelinks]{hyperref}
18 \usepackage[xindy,entrycounter]{glossaries}
20 \usepackage[style=alphabetic,backend=biber]{biblatex}
22 \usepackage{mathtools}
24 % use 'disable' before printing:
25 \usepackage[disable]{todonotes}
32 \usepackage{perpage} %the perpage package
33 \MakePerPage{footnote} %the perpage package command
35 \theoremstyle{definition}
36 \newtheorem*{wordDef}{Definition}
37 \newtheorem*{theorem}{Theorem}
39 \graphicspath{ {./figures/} }
41 \newcommand{\citing}[1]{~\cite{#1}}
42 %\newcommand{\myref}[1]{\cref{#1} on \cpageref{#1}}
43 \newcommand{\myref}[1]{\vref{#1}}
45 \newcommand{\glossref}[1]{\textsuperscript{(\glsrefentry{#1})}}
46 %\newcommand{\gloss}[1]{\gls{#1}\glossref{#1}}
47 %\newcommand{\glosspl}[1]{\glspl{#1}\glossref{#1}}
48 \newcommand{\gloss}[1]{\gls{#1}}
49 \newcommand{\glosspl}[1]{\glspl{#1}}
51 \newcommand{\definition}[1]{\begin{wordDef}#1\end{wordDef}}
52 \newcommand{\see}[1]{(see \myref{#1})}
53 \newcommand{\explanation}[3]{\noindent\textbf{\textit{#1}}\\*\emph{When:}
54 #2\\*\emph{How:} #3\\*[-7px]}
56 %\newcommand{\type}[1]{\lstinline{#1}}
57 \newcommand{\code}[1]{\texttt{\textbf{#1}}}
58 \newcommand{\type}[1]{\code{#1}}
59 \newcommand{\typeref}[1]{\footnote{\type{#1}}}
60 \newcommand{\typewithref}[2]{\type{#2}\typeref{#1.#2}}
61 \newcommand{\method}[1]{\type{#1}}
62 \newcommand{\methodref}[2]{\footnote{\type{#1}\method{\##2()}}}
63 \newcommand{\methodwithref}[2]{\method{#2}\footnote{\type{#1}\method{\##2()}}}
64 \newcommand{\var}[1]{\type{#1}}
66 \newcommand{\name}[1]{#1}
67 \newcommand{\tit}[1]{\emph{#1}}
68 \newcommand{\refa}[1]{\emph{#1}}
69 \newcommand{\pattern}[1]{\emph{#1}}
70 \newcommand{\metr}[1]{\emph{#1}}
71 \newcommand{\ExtractMethod}{\refa{Extract Method}\xspace}
72 \newcommand{\MoveMethod}{\refa{Move Method}\xspace}
73 \newcommand{\ExtractAndMoveMethod}{\refa{Extract and Move Method}\xspace}
75 \newcommand\todoin[2][]{\todo[inline, caption={#2}, #1]{
76 \begin{minipage}{\textwidth-4pt}#2\end{minipage}}}
80 \author{Erlend Kristiansen}
84 \newglossaryentry{profiling}
87 description={is to run a computer program through a profiler/with a profiler
88 attached. A profiler is a program for analyzing performance within an
89 application. It is used to analyze memory consumption, processing time and
90 frequency of procedure calls and such},
93 \newglossaryentry{profiler}
96 description={A profiler is a program for analyzing performance within an
97 application. It is used to analyze memory consumption, processing time and
98 frequency of procedure calls and such.}
100 \newglossaryentry{xUnit}
102 name={xUnit framework},
103 description={An xUnit framework is a framework for writing unit tests for a
104 computer program. It follows the patterns known from the JUnit framework for
105 Java\citing{fowlerXunit}
107 plural={xUnit frameworks}
109 \newglossaryentry{softwareObfuscation}
111 name={software obfuscation},
112 description={makes source code harder to read and analyze, while preserving
115 \newglossaryentry{extractClass}
117 name=\refa{Extract Class},
118 description={The \refa{Extract Class} refactoring works by creating a class,
119 for then to move members from another class to that class and access them from
120 the old class via a reference to the new class}
122 \newglossaryentry{designPattern}
124 name={design pattern},
125 description={A design pattern is a named abstraction, that is meant to solve a
126 general design problem. It describes the key aspects of a common problem and
127 identifies its participators and how they collaborate},
128 plural={design patterns}
130 \newglossaryentry{extractMethod}
132 name=\refa{Extract Method},
133 description={The \refa{Extract Method} refactoring is used to extract a
134 fragment of code from its context and into a new method. A call to the new
135 method is inlined where the fragment was before. It is used to break code into
136 logical units, with names that explain their purpose}
138 \newglossaryentry{moveMethod}
140 name=\refa{Move Method},
141 description={The \refa{Move Method} refactoring is used to move a method from
142 one class to another. This is useful if the method is using more features of
143 another class than of the class which it is currently defined. Then all calls
144 to this method must be updated, or the method must be copied, with the old
145 method delegating to the new method}
148 \bibliography{bibliography/master-thesis-erlenkr-bibliography}
150 % UML comment in TikZ:
151 % ref: https://tex.stackexchange.com/questions/103688/folded-paper-shape-tikz
153 \pgfdeclareshape{umlcomment}{
154 \inheritsavedanchors[from=rectangle] % this is nearly a rectangle
155 \inheritanchorborder[from=rectangle]
156 \inheritanchor[from=rectangle]{center}
157 \inheritanchor[from=rectangle]{north}
158 \inheritanchor[from=rectangle]{south}
159 \inheritanchor[from=rectangle]{west}
160 \inheritanchor[from=rectangle]{east}
161 % ... and possibly more
162 \backgroundpath{% this is new
163 % store lower right in xa/ya and upper right in xb/yb
164 \southwest \pgf@xa=\pgf@x \pgf@ya=\pgf@y
165 \northeast \pgf@xb=\pgf@x \pgf@yb=\pgf@y
166 % compute corner of ‘‘flipped page’’
167 \pgf@xc=\pgf@xb \advance\pgf@xc by-10pt % this should be a parameter
168 \pgf@yc=\pgf@yb \advance\pgf@yc by-10pt
169 % construct main path
170 \pgfpathmoveto{\pgfpoint{\pgf@xa}{\pgf@ya}}
171 \pgfpathlineto{\pgfpoint{\pgf@xa}{\pgf@yb}}
172 \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yb}}
173 \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@yc}}
174 \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@ya}}
177 \pgfpathmoveto{\pgfpoint{\pgf@xc}{\pgf@yb}}
178 \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yc}}
179 \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@yc}}
180 \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yc}}
185 \tikzstyle{comment}=[%
198 %\interfootnotelinepenalty=10000
199 \renewcommand{\thesection}{\arabic{section}}
202 \pagenumbering{roman}
208 \chapter*{What is Refactoring?}
210 This question is best answered by first defining the concept of a
211 \emph{refactoring}, what it is to \emph{refactor}, and then discuss what aspects
212 of programming make people want to refactor their code.
214 \section{Defining refactoring}
215 Martin Fowler, in his classic book on refactoring\citing{refactoring}, defines a
216 refactoring like this:
219 \emph{Refactoring} (noun): a change made to the internal
220 structure\footnote{The structure observable by the programmer.} of software to
221 make it easier to understand and cheaper to modify without changing its
222 observable behavior.~\cite[p.~53]{refactoring}
225 \noindent This definition assigns additional meaning to the word
226 \emph{refactoring}, beyond the composition of the prefix \emph{re-}, usually
227 meaning something like ``again'' or ``anew'', and the word \emph{factoring},
228 that can mean to isolate the \emph{factors} of something. Here a \emph{factor}
229 would be close to the mathematical definition of something that divides a
230 quantity, without leaving a remainder. Fowler is mixing the \emph{motivation}
231 behind refactoring into his definition. Instead it could be more refined, formed
232 to only consider the \emph{mechanical} and \emph{behavioral} aspects of
233 refactoring. That is to factor the program again, putting it together in a
234 different way than before, while preserving the behavior of the program. An
235 alternative definition could then be:
237 \definition{A \emph{refactoring} is a transformation
238 done to a program without altering its external behavior.}
240 From this we can conclude that a refactoring primarily changes how the
241 \emph{code} of a program is perceived by the \emph{programmer}, and not the
242 \emph{behavior} experienced by any user of the program. Although the logical
243 meaning is preserved, such changes could potentially alter the program's
244 behavior when it comes to performance gain or -penalties. So any logic depending
245 on the performance of a program could make the program behave differently after
248 In the extreme case one could argue that \gloss{softwareObfuscation} is
249 refactoring. It is often used to protect proprietary software. It restrains
250 uninvited viewers, so they have a hard time analyzing code that they are not
251 supposed to know how works. This could be a problem when using a language that
252 is possible to decompile, such as Java.
254 Obfuscation could be done composing many, more or less randomly chosen,
255 refactorings. Then the question arises whether it can be called a
256 \emph{composite refactoring} or not \see{compositeRefactorings}? The answer is
257 not obvious. First, there is no way to describe the mechanics of software
258 obfuscation, because there are infinitely many ways to do that. Second,
259 obfuscation can be thought of as \emph{one operation}: Either the code is
260 obfuscated, or it is not. Third, it makes no sense to call software obfuscation
261 \emph{a refactoring}, since it holds different meaning to different people.
263 This last point is important, since one of the motivations behind defining
264 different refactorings, is to establish a \emph{vocabulary} for software
265 professionals to use when reasoning about and discussing programs, similar to
266 the motivation behind \glosspl{designPattern}\citing{designPatterns}.
268 So for describing \emph{software obfuscation}, it might be more appropriate to
269 define what you do when performing it rather than precisely defining its
270 mechanics in terms of other refactorings.
273 \section{The etymology of 'refactoring'}
274 It is a little difficult to pinpoint the exact origin of the word
275 ``refactoring'', as it seems to have evolved as part of a colloquial
276 terminology, more than a scientific term. There is no authoritative source for a
277 formal definition of it.
279 According to Martin Fowler\citing{etymology-refactoring}, there may also be more
280 than one origin of the word. The most well-known source, when it comes to the
281 origin of \emph{refactoring}, is the
282 Smalltalk\footnote{\label{footNote}Programming language} community and their
283 infamous \name{Refactoring
284 Browser}\footnote{\url{http://st-www.cs.illinois.edu/users/brant/Refactory/RefactoringBrowser.html}}
285 described in the article \tit{A Refactoring Tool for
286 Smalltalk}\citing{refactoringBrowser1997}, published in 1997.
287 Allegedly\citing{etymology-refactoring}, the metaphor of factoring programs was
288 also present in the Forth\textsuperscript{\ref{footNote}} community, and the
289 word ``refactoring'' is mentioned in a book by Leo Brodie, called \tit{Thinking
290 Forth}\citing{brodie2004}, first published in 1984\footnote{\tit{Thinking Forth}
291 was first published in 1984 by the \name{Forth Interest Group}. Then it was
292 reprinted in 1994 with minor typographical corrections, before it was
293 transcribed into an electronic edition typeset in \LaTeX\ and published under a
294 Creative Commons licence in
295 2004. The edition cited here is the 2004 edition, but the content should
296 essentially be as in 1984.}. The exact word is only printed one
297 place~\cite[p.~232]{brodie2004}, but the term \emph{factoring} is prominent in
298 the book, that also contains a whole chapter dedicated to (re)factoring, and how
299 to keep the (Forth) code clean and maintainable.
302 \ldots good factoring technique is perhaps the most important skill for a
303 Forth programmer.~\cite[p.~172]{brodie2004}
306 \noindent Brodie also express what \emph{factoring} means to him:
309 Factoring means organizing code into useful fragments. To make a fragment
310 useful, you often must separate reusable parts from non-reusable parts. The
311 reusable parts become new definitions. The non-reusable parts become arguments
312 or parameters to the definitions.~\cite[p.~172]{brodie2004}
315 Fowler claims that the usage of the word \emph{refactoring} did not pass between
316 the \name{Forth} and \name{Smalltalk} communities, but that it emerged
317 independently in each of the communities.
319 \section{Motivation -- Why people refactor}
320 There are many reasons why people want to refactor their programs. They can for
321 instance do it to remove duplication, break up long methods or to introduce
322 design patterns into their software systems. The shared trait for all these are
323 that peoples' intentions are to make their programs \emph{better}, in some
324 sense. But what aspects of their programs are becoming improved?
326 As just mentioned, people often refactor to get rid of duplication. They are
327 moving identical or similar code into methods, and are pushing methods up or
328 down in their class hierarchies. They are making template methods for
329 overlapping algorithms/functionality, and so on. It is all about gathering what
330 belongs together and putting it all in one place. The resulting code is then
331 easier to maintain. When removing the implicit coupling\footnote{When
332 duplicating code, the duplicate pieces of code might not be coupled, apart
333 from representing the same functionality. So if this functionality is going to
334 change, it might need to change in more than one place, thus creating an
335 implicit coupling between multiple pieces of code.} between code snippets, the
336 location of a bug is limited to only one place, and new functionality need only
337 to be added to this one place, instead of a number of places people might not
340 A problem you often encounter when programming, is that a program contains a lot
341 of long and hard-to-grasp methods. It can then help to break the methods into
342 smaller ones, using the \gloss{extractMethod} refactoring\citing{refactoring}.
343 Then you may discover something about a program that you were not aware of
344 before; revealing bugs you did not know about or could not find due to the
345 complex structure of your program. \todo{Proof?} Making the methods smaller and
346 giving good names to the new ones clarifies the algorithms and enhances the
347 \emph{understandability} of the program \see{magic_number_seven}. This makes
348 refactoring an excellent method for exploring unknown program code, or code that
349 you had forgotten that you wrote.
351 Most primitive refactorings are simple, and usually involves moving code
352 around\citing{kerievsky2005}. The motivation behind them may first be revealed
353 when they are combined into larger --- higher level --- refactorings, called
354 \emph{composite refactorings} \see{compositeRefactorings}. Often the goal of
355 such a series of refactorings is a design pattern. Thus the design can
356 \emph{evolve} throughout the lifetime of a program, as opposed to designing
357 up-front. It is all about being structured and taking small steps to improve a
360 Many software design pattern are aimed at lowering the coupling between
361 different classes and different layers of logic. One of the most famous is
362 perhaps the \pattern{Model-View-Controller}\citing{designPatterns} pattern. It
363 is aimed at lowering the coupling between the user interface, the business logic
364 and the data representation of a program. This also has the added benefit that
365 the business logic could much easier be the target of automated tests, thus
366 increasing the productivity in the software development process.
368 Another effect of refactoring is that with the increased separation of concerns
369 coming out of many refactorings, the \emph{performance} can be improved. When
370 profiling programs, the problematic parts are narrowed down to smaller parts of
371 the code, which are easier to tune, and optimization can be performed only where
372 needed and in a more effective way\citing{refactoring}.
374 Last, but not least, and this should probably be the best reason to refactor, is
375 to refactor to \emph{facilitate a program change}. If one has managed to keep
376 one's code clean and tidy, and the code is not bloated with design patterns that
377 are not ever going to be needed, then some refactoring might be needed to
378 introduce a design pattern that is appropriate for the change that is going to
381 Refactoring program code --- with a goal in mind --- can give the code itself
382 more value. That is in the form of robustness to bugs, understandability and
383 maintainability. Having robust code is an obvious advantage, but
384 understandability and maintainability are both very important aspects of
385 software development. By incorporating refactoring in the development process,
386 bugs are found faster, new functionality is added more easily and code is easier
387 to understand by the next person exposed to it, which might as well be the
388 person who wrote it. The consequence of this, is that refactoring can increase
389 the average productivity of the development process, and thus also add to the
390 monetary value of a business in the long run. The perspective on productivity
391 and money should also be able to open the eyes of the many nearsighted managers
392 that seldom see beyond the next milestone.
394 \section{The magical number seven}\label{magic_number_seven}
395 The article \tit{The magical number seven, plus or minus two: some limits on our
396 capacity for processing information}\citing{miller1956} by George A. Miller,
397 was published in the journal \name{Psychological Review} in 1956. It presents
398 evidence that support that the capacity of the number of objects a human being
399 can hold in its working memory is roughly seven, plus or minus two objects. This
400 number varies a bit depending on the nature and complexity of the objects, but
401 is according to Miller ``\ldots never changing so much as to be
404 Miller's article culminates in the section called \emph{Recoding}, a term he
405 borrows from communication theory. The central result in this section is that by
406 recoding information, the capacity of the amount of information that a human can
407 process at a time is increased. By \emph{recoding}, Miller means to group
408 objects together in chunks, and give each chunk a new name that it can be
412 \ldots recoding is an extremely powerful weapon for increasing the amount of
413 information that we can deal with.~\cite[p.~95]{miller1956}
416 By organizing objects into patterns of ever growing depth, one can memorize and
417 process a much larger amount of data than if it were to be represented as its
418 basic pieces. This grouping and renaming is analogous to how many refactorings
419 work, by grouping pieces of code and give them a new name. Examples are the
420 fundamental \ExtractMethod and \refa{Extract Class}
421 refactorings\citing{refactoring}.
423 An example from the article addresses the problem of memorizing a sequence of
424 binary digits. The example presented here is a slightly modified version of the
425 one presented in the original article\citing{miller1956}, but it preserves the
426 essence of it. Let us say we have the following sequence of
427 16 binary digits: ``1010001001110011''. Most of us will have a hard time
428 memorizing this sequence by only reading it once or twice. Imagine if we instead
429 translate it to this sequence: ``A273''. If you have a background from computer
430 science, it will be obvious that the latter sequence is the first sequence
431 recoded to be represented by digits in base 16. Most people should be able to
432 memorize this last sequence by only looking at it once.
434 Another result from the Miller article is that when the amount of information a
435 human must interpret increases, it is crucial that the translation from one code
436 to another must be almost automatic for the subject to be able to remember the
437 translation, before \heshe is presented with new information to recode. Thus
438 learning and understanding how to best organize certain kinds of data is
439 essential to efficiently handle that kind of data in the future. This is much
440 like when humans learn to read. First they must learn how to recognize letters.
441 Then they can learn distinct words, and later read sequences of words that form
442 whole sentences. Eventually, most of them will be able to read whole books and
443 briefly retell the important parts of its content. This suggest that the use of
444 design patterns is a good idea when reasoning about computer programs. With
445 extensive use of design patterns when creating complex program structures, one
446 does not always have to read whole classes of code to comprehend how they
447 function, it may be sufficient to only see the name of a class to almost fully
448 understand its responsibilities.
451 Our language is tremendously useful for repackaging material into a few chunks
452 rich in information.~\cite[p.~95]{miller1956}
455 Without further evidence, these results at least indicate that refactoring
456 source code into smaller units with higher cohesion and, when needed,
457 introducing appropriate design patterns, should aid in the cause of creating
458 computer programs that are easier to maintain and have code that is easier (and
461 \section{Notable contributions to the refactoring literature}
462 \todoin{Update with more contributions}
465 \item[1992] William F. Opdyke submits his doctoral dissertation called
466 \tit{Refactoring Object-Oriented Frameworks}\citing{opdyke1992}. This work
467 defines a set of refactorings, that are behavior preserving given that their
468 preconditions are met. The dissertation is focused on the automation of
470 \item[1999] Martin Fowler et al.: \tit{Refactoring: Improving the Design of
471 Existing Code}\citing{refactoring}. This is maybe the most influential text
472 on refactoring. It bares similarities with Opdykes thesis\citing{opdyke1992}
473 in the way that it provides a catalog of refactorings. But Fowler's book is
474 more about the craft of refactoring, as he focuses on establishing a
475 vocabulary for refactoring, together with the mechanics of different
476 refactorings and when to perform them. His methodology is also founded on
477 the principles of test-driven development.
478 \item[2005] Joshua Kerievsky: \tit{Refactoring to
479 Patterns}\citing{kerievsky2005}. This book is heavily influenced by Fowler's
480 \tit{Refactoring}\citing{refactoring} and the ``Gang of Four'' \tit{Design
481 Patterns}\citing{designPatterns}. It is building on the refactoring
482 catalogue from Fowler's book, but is trying to bridge the gap between
483 \emph{refactoring} and \emph{design patterns} by providing a series of
484 higher-level composite refactorings, that makes code evolve toward or away
485 from certain design patterns. The book is trying to build up the reader's
486 intuition around \emph{why} one would want to use a particular design
487 pattern, and not just \emph{how}. The book is encouraging evolutionary
488 design \see{relationToDesignPatterns}.
491 \section{Tool support (for Java)}\label{toolSupport}
492 This section will briefly compare the refactoring support of the three IDEs
493 \name{Eclipse}\footnote{\url{http://www.eclipse.org/}}, \name{IntelliJ
494 IDEA}\footnote{The IDE under comparison is the \name{Community Edition},
495 \url{http://www.jetbrains.com/idea/}} and
496 \name{NetBeans}\footnote{\url{https://netbeans.org/}}. These are the most
497 popular Java IDEs\citing{javaReport2011}.
499 All three IDEs provide support for the most useful refactorings, like the
500 different extract, move and rename refactorings. In fact, Java-targeted IDEs are
501 known for their good refactoring support, so this did not appear as a big
504 The IDEs seem to have excellent support for the \ExtractMethod refactoring, so
505 at least they have all passed the first ``refactoring
506 rubicon''\citing{fowlerRubicon2001,secondRubicon2012}.
508 Regarding the \gloss{moveMethod} refactoring, the \name{Eclipse} and
509 \name{IntelliJ} IDEs do the job in very similar manners. In most situations they
510 both do a satisfying job by producing the expected outcome. But they do nothing
511 to check that the result does not break the semantics of the program
513 The \name{NetBeans} IDE implements this refactoring in a somewhat
514 unsophisticated way. For starters, the refactoring's default destination for the
515 move, is the same class as the method already resides in, although it refuses to
516 perform the refactoring if chosen. But the worst part is, that if moving the
517 method \method{f} of the class \type{C} to the class \type{X}, it will break the
518 code. The result is shown in \myref{lst:moveMethod_NetBeans}.
522 \begin{minted}[samepage]{java}
535 \begin{minted}[samepage]{java}
545 \caption{Moving method \method{f} from \type{C} to \type{X}.}
546 \label{lst:moveMethod_NetBeans}
549 \name{NetBeans} will try to create code that call the methods \method{m} and \method{n}
550 of \type{X} by accessing them through \var{c.x}, where \var{c} is a parameter of
551 type \type{C} that is added the method \method{f} when it is moved. (This is
552 seldom the desired outcome of this refactoring, but ironically, this ``feature''
553 keeps \name{NetBeans} from breaking the code in the example from \myref{correctness}.)
554 If \var{c.x} for some reason is inaccessible to \type{X}, as in this case, the
555 refactoring breaks the code, and it will not compile. \name{NetBeans} presents a
556 preview of the refactoring outcome, but the preview does not catch it if the IDE
557 is about break the program.
559 The IDEs under investigation seem to have fairly good support for primitive
560 refactorings, but what about more complex ones, such as
561 \gloss{extractClass}\citing{refactoring}? \name{IntelliJ} handles this in a
562 fairly good manner, although, in the case of private methods, it leaves unused
563 methods behind. These are methods that delegate to a field with the type of the
564 new class, but are not used anywhere. \name{Eclipse} has added its own quirk to
565 the \refa{Extract Class} refactoring, and only allows for \emph{fields} to be
566 moved to a new class, \emph{not methods}. This makes it effectively only
567 extracting a data structure, and calling it \refa{Extract Class} is a little
568 misleading. One would often be better off with textual extract and paste than
569 using the \refa{Extract Class} refactoring in \name{Eclipse}. When it comes to
570 \name{NetBeans}, it does not even show an attempt on providing this refactoring.
572 \todoin{Visual Studio (C++/C\#), Smalltalk refactoring browser?,
573 second refactoring rubicon?}
575 \section{The relation to design patterns}\label{relationToDesignPatterns}
577 Refactoring and design patterns have at least one thing in common, they are both
578 promoted by advocates of \emph{clean code}\citing{cleanCode} as fundamental
579 tools on the road to more maintainable and extendable source code.
582 Design patterns help you determine how to reorganize a design, and they can
583 reduce the amount of refactoring you need to do
584 later.~\cite[p.~353]{designPatterns}
587 Although sometimes associated with
588 over-engineering\citing{kerievsky2005,refactoring}, design patterns are in
589 general assumed to be good for maintainability of source code. That may be
590 because many of them are designed to support the \emph{open/closed principle} of
591 object-oriented programming. The principle was first formulated by Bertrand
592 Meyer, the creator of the Eiffel programming language, like this: ``Modules
593 should be both open and closed.''\citing{meyer1988} It has been popularized,
594 with this as a common version:
597 Software entities (classes, modules, functions, etc.) should be open for
598 extension, but closed for modification.\footnote{See
599 \url{http://c2.com/cgi/wiki?OpenClosedPrinciple} or
600 \url{https://en.wikipedia.org/wiki/Open/closed_principle}}
603 Maintainability is often thought of as the ability to be able to introduce new
604 functionality without having to change too much of the old code. When
605 refactoring, the motivation is often to facilitate adding new functionality. It
606 is about factoring the old code in a way that makes the new functionality being
607 able to benefit from the functionality already residing in a software system,
608 without having to copy old code into new. Then, next time someone shall add new
609 functionality, it is less likely that the old code has to change. Assuming that
610 a design pattern is the best way to get rid of duplication and assist in
611 implementing new functionality, it is reasonable to conclude that a design
612 pattern often is the target of a series of refactorings. Having a repertoire of
613 design patterns can also help in knowing when and how to refactor a program to
614 make it reflect certain desired characteristics.
617 There is a natural relation between patterns and refactorings. Patterns are
618 where you want to be; refactorings are ways to get there from somewhere
619 else.~\cite[p.~107]{refactoring}
622 This quote is wise in many contexts, but it is not always appropriate to say
623 ``Patterns are where you want to be\ldots''. \emph{Sometimes}, patterns are
624 where you want to be, but only because it will benefit your design. It is not
625 true that one should always try to incorporate as many design patterns as
626 possible into a program. It is not like they have intrinsic value. They only add
627 value to a system when they support its design. Otherwise, the use of design
628 patterns may only lead to a program that is more complex than necessary.
631 The overuse of patterns tends to result from being patterns happy. We are
632 \emph{patterns happy} when we become so enamored of patterns that we simply
633 must use them in our code.~\cite[p.~24]{kerievsky2005}
636 This can easily happen when relying largely on up-front design. Then it is
637 natural, in the very beginning, to try to build in all the flexibility that one
638 believes will be necessary throughout the lifetime of a software system.
639 According to Joshua Kerievsky ``That sounds reasonable --- if you happen to be
640 psychic.''~\cite[p.~1]{kerievsky2005} He is advocating what he believes is a
641 better approach: To let software continually evolve. To start with a simple
642 design that meets today's needs, and tackle future needs by refactoring to
643 satisfy them. He believes that this is a more economic approach than investing
644 time and money into a design that inevitably is going to change. By relying on
645 continuously refactoring a system, its design can be made simpler without
646 sacrificing flexibility. To be able to fully rely on this approach, it is of
647 utter importance to have a reliable suit of tests to lean on \see{testing}. This
648 makes the design process more natural and less characterized by difficult
649 decisions that has to be made before proceeding in the process, and that is
650 going to define a project for all of its unforeseeable future.
654 \section{Classification of refactorings}
655 % only interesting refactorings
656 % with 2 detailed examples? One for structured and one for intra-method?
657 % Is replacing Bubblesort with Quick Sort considered a refactoring?
659 \subsection{Structural refactorings}
661 \subsubsection{Primitive refactorings}
664 \explanation{Extract Method}{You have a code fragment that can be grouped
665 together.}{Turn the fragment into a method whose name explains the purpose of
668 \explanation{Inline Method}{A method's body is just as clear as its name.}{Put
669 the method's body into the body of its callers and remove the method.}
671 \explanation{Inline Temp}{You have a temp that is assigned to once with a simple
672 expression, and the temp is getting in the way of other refactorings.}{Replace
673 all references to that temp with the expression}
675 % Moving Features Between Objects
676 \explanation{Move Method}{A method is, or will be, using or used by more
677 features of another class than the class on which it is defined.}{Create a new
678 method with a similar body in the class it uses most. Either turn the old method
679 into a simple delegation, or remove it altogether.}
681 \explanation{Move Field}{A field is, or will be, used by another class more than
682 the class on which it is defined}{Create a new field in the target class, and
683 change all its users.}
686 \explanation{Replace Magic Number with Symbolic Constant}{You have a literal
687 number with a particular meaning.}{Create a constant, name it after the meaning,
688 and replace the number with it.}
690 \explanation{Encapsulate Field}{There is a public field.}{Make it private and
693 \explanation{Replace Type Code with Class}{A class has a numeric type code that
694 does not affect its behavior.}{Replace the number with a new class.}
696 \explanation{Replace Type Code with Subclasses}{You have an immutable type code
697 that affects the behavior of a class.}{Replace the type code with subclasses.}
699 \explanation{Replace Type Code with State/Strategy}{You have a type code that
700 affects the behavior of a class, but you cannot use subclassing.}{Replace the
701 type code with a state object.}
703 % Simplifying Conditional Expressions
704 \explanation{Consolidate Duplicate Conditional Fragments}{The same fragment of
705 code is in all branches of a conditional expression.}{Move it outside of the
708 \explanation{Remove Control Flag}{You have a variable that is acting as a
709 control flag fro a series of boolean expressions.}{Use a break or return
712 \explanation{Replace Nested Conditional with Guard Clauses}{A method has
713 conditional behavior that does not make clear the normal path of
714 execution.}{Use guard clauses for all special cases.}
716 \explanation{Introduce Null Object}{You have repeated checks for a null
717 value.}{Replace the null value with a null object.}
719 \explanation{Introduce Assertion}{A section of code assumes something about the
720 state of the program.}{Make the assumption explicit with an assertion.}
722 % Making Method Calls Simpler
723 \explanation{Rename Method}{The name of a method does not reveal its
724 purpose.}{Change the name of the method}
726 \explanation{Add Parameter}{A method needs more information from its
727 caller.}{Add a parameter for an object that can pass on this information.}
729 \explanation{Remove Parameter}{A parameter is no longer used by the method
732 %\explanation{Parameterize Method}{Several methods do similar things but with
733 %different values contained in the method.}{Create one method that uses a
734 %parameter for the different values.}
736 \explanation{Preserve Whole Object}{You are getting several values from an
737 object and passing these values as parameters in a method call.}{Send the whole
740 \explanation{Remove Setting Method}{A field should be set at creation time and
741 never altered.}{Remove any setting method for that field.}
743 \explanation{Hide Method}{A method is not used by any other class.}{Make the
746 \explanation{Replace Constructor with Factory Method}{You want to do more than
747 simple construction when you create an object}{Replace the constructor with a
750 % Dealing with Generalization
751 \explanation{Pull Up Field}{Two subclasses have the same field.}{Move the field
754 \explanation{Pull Up Method}{You have methods with identical results on
755 subclasses.}{Move them to the superclass.}
757 \explanation{Push Down Method}{Behavior on a superclass is relevant only for
758 some of its subclasses.}{Move it to those subclasses.}
760 \explanation{Push Down Field}{A field is used only by some subclasses.}{Move the
761 field to those subclasses}
763 \explanation{Extract Interface}{Several clients use the same subset of a class's
764 interface, or two classes have part of their interfaces in common.}{Extract the
765 subset into an interface.}
767 \explanation{Replace Inheritance with Delegation}{A subclass uses only part of a
768 superclasses interface or does not want to inherit data.}{Create a field for the
769 superclass, adjust methods to delegate to the superclass, and remove the
772 \explanation{Replace Delegation with Inheritance}{You're using delegation and
773 are often writing many simple delegations for the entire interface}{Make the
774 delegating class a subclass of the delegate.}
776 \subsubsection{Composite refactorings}
779 % \explanation{Replace Method with Method Object}{}{}
781 % Moving Features Between Objects
782 \explanation{Extract Class}{You have one class doing work that should be done by
783 two}{Create a new class and move the relevant fields and methods from the old
784 class into the new class.}
786 \explanation{Inline Class}{A class isn't doing very much.}{Move all its features
787 into another class and delete it.}
789 \explanation{Hide Delegate}{A client is calling a delegate class of an
790 object.}{Create Methods on the server to hide the delegate.}
792 \explanation{Remove Middle Man}{A class is doing to much simple delegation.}{Get
793 the client to call the delegate directly.}
796 \explanation{Replace Data Value with Object}{You have a data item that needs
797 additional data or behavior.}{Turn the data item into an object.}
799 \explanation{Change Value to Reference}{You have a class with many equal
800 instances that you want to replace with a single object.}{Turn the object into a
803 \explanation{Encapsulate Collection}{A method returns a collection}{Make it
804 return a read-only view and provide add/remove methods.}
806 % \explanation{Replace Array with Object}{}{}
808 \explanation{Replace Subclass with Fields}{You have subclasses that vary only in
809 methods that return constant data.}{Change the methods to superclass fields and
810 eliminate the subclasses.}
812 % Simplifying Conditional Expressions
813 \explanation{Decompose Conditional}{You have a complicated conditional
814 (if-then-else) statement.}{Extract methods from the condition, then part, an
817 \explanation{Consolidate Conditional Expression}{You have a sequence of
818 conditional tests with the same result.}{Combine them into a single conditional
819 expression and extract it.}
821 \explanation{Replace Conditional with Polymorphism}{You have a conditional that
822 chooses different behavior depending on the type of an object.}{Move each leg
823 of the conditional to an overriding method in a subclass. Make the original
826 % Making Method Calls Simpler
827 \explanation{Replace Parameter with Method}{An object invokes a method, then
828 passes the result as a parameter for a method. The receiver can also invoke this
829 method.}{Remove the parameter and let the receiver invoke the method.}
831 \explanation{Introduce Parameter Object}{You have a group of parameters that
832 naturally go together.}{Replace them with an object.}
834 % Dealing with Generalization
835 \explanation{Extract Subclass}{A class has features that are used only in some
836 instances.}{Create a subclass for that subset of features.}
838 \explanation{Extract Superclass}{You have two classes with similar
839 features.}{Create a superclass and move the common features to the
842 \explanation{Collapse Hierarchy}{A superclass and subclass are not very
843 different.}{Merge them together.}
845 \explanation{Form Template Method}{You have two methods in subclasses that
846 perform similar steps in the same order, yet the steps are different.}{Get the
847 steps into methods with the same signature, so that the original methods become
848 the same. Then you can pull them up.}
851 \subsection{Functional refactorings}
853 \explanation{Substitute Algorithm}{You want to replace an algorithm with one
854 that is clearer.}{Replace the body of the method with the new algorithm.}
858 \section{The impact on software quality}
860 \subsection{What is software quality?}
861 The term \emph{software quality} has many meanings. It all depends on the
862 context we put it in. If we look at it with the eyes of a software developer, it
863 usually means that the software is easily maintainable and testable, or in other
864 words, that it is \emph{well designed}. This often correlates with the
865 management scale, where \emph{keeping the schedule} and \emph{customer
866 satisfaction} is at the center. From the customers point of view, in addition to
867 good usability, \emph{performance} and \emph{lack of bugs} is always
868 appreciated, measurements that are also shared by the software developer. (In
869 addition, such things as good documentation could be measured, but this is out
870 of the scope of this document.)
872 \subsection{The impact on performance}
874 Refactoring certainly will make software go more slowly\footnote{With todays
875 compiler optimization techniques and performance tuning of e.g. the Java
876 virtual machine, the penalties of object creation and method calls are
877 debatable.}, but it also makes the software more amenable to performance
878 tuning.~\cite[p.~69]{refactoring}
881 \noindent There is a common belief that refactoring compromises performance, due
882 to increased degree of indirection and that polymorphism is slower than
885 In a survey, Demeyer\citing{demeyer2002} disproves this view in the case of
886 polymorphism. He did an experiment on, what he calls, ``Transform Self Type
887 Checks'' where you introduce a new polymorphic method and a new class hierarchy
888 to get rid of a class' type checking of a ``type attribute``. He uses this kind
889 of transformation to represent other ways of replacing conditionals with
890 polymorphism as well. The experiment is performed on the C++ programming
891 language and with three different compilers and platforms. Demeyer concludes
892 that, with compiler optimization turned on, polymorphism beats middle to large
893 sized if-statements and does as well as case-statements. (In accordance with
894 his hypothesis, due to similarities between the way C++ handles polymorphism and
898 The interesting thing about performance is that if you analyze most programs,
899 you find that they waste most of their time in a small fraction of the
900 code.~\cite[p.~70]{refactoring}
903 \noindent So, although an increased amount of method calls could potentially
904 slow down programs, one should avoid premature optimization and sacrificing good
905 design, leaving the performance tuning until after \gloss{profiling} the
906 software and having isolated the actual problem areas.
908 \section{Composite refactorings}\label{compositeRefactorings}
909 \todo{motivation, examples, manual vs automated?, what about refactoring in a
910 very large code base?}
911 Generally, when thinking about refactoring, at the mechanical level, there are
912 essentially two kinds of refactorings. There are the \emph{primitive}
913 refactorings, and the \emph{composite} refactorings.
915 \definition{A \emph{primitive refactoring} is a refactoring that cannot be
916 expressed in terms of other refactorings.}
918 \noindent Examples are the \refa{Pull Up Field} and \refa{Pull Up
919 Method} refactorings\citing{refactoring}, that move members up in their class
922 \definition{A \emph{composite refactoring} is a refactoring that can be
923 expressed in terms of two or more other refactorings.}
925 \noindent An example of a composite refactoring is the \refa{Extract
926 Superclass} refactoring\citing{refactoring}. In its simplest form, it is composed
927 of the previously described primitive refactorings, in addition to the
928 \refa{Pull Up Constructor Body} refactoring\citing{refactoring}. It works
929 by creating an abstract superclass that the target class(es) inherits from, then
930 by applying \refa{Pull Up Field}, \refa{Pull Up Method} and
931 \refa{Pull Up Constructor Body} on the members that are to be members of
932 the new superclass. If there are multiple classes in play, their interfaces may
933 need to be united with the help of some rename refactorings, before extracting
934 the superclass. For an overview of the \refa{Extract Superclass}
935 refactoring, see \myref{fig:extractSuperclass}.
939 \includegraphics[angle=270,width=\linewidth]{extractSuperclassItalic.pdf}
940 \caption{The Extract Superclass refactoring, with united interfaces.}
941 \label{fig:extractSuperclass}
944 \section{Manual vs. automated refactorings}
945 Refactoring is something every programmer does, even if \heshe does not known
946 the term \emph{refactoring}. Every refinement of source code that does not alter
947 the program's behavior is a refactoring. For small refactorings, such as
948 \ExtractMethod, executing it manually is a manageable task, but is still prone
949 to errors. Getting it right the first time is not easy, considering the method
950 signature and all the other aspects of the refactoring that has to be in place.
952 Consider the renaming of classes, methods and fields. For complex programs these
953 refactorings are almost impossible to get right. Attacking them with textual
954 search and replace, or even regular expressions, will fall short on these tasks.
955 Then it is crucial to have proper tool support that can perform them
956 automatically. Tools that can parse source code and thus have semantic knowledge
957 about which occurrences of which names belong to what construct in the program.
958 For even trying to perform one of these complex task manually, one would have to
959 be very confident on the existing test suite \see{testing}.
961 \section{Correctness of refactorings}\label{correctness}
962 For automated refactorings to be truly useful, they must show a high degree of
963 behavior preservation. This last sentence might seem obvious, but there are
964 examples of refactorings in existing tools that break programs. In an ideal
965 world, every automated refactoring would be ``complete'', in the sense that it
966 would never break a program. In an ideal world, every program would also be free
967 from bugs. In modern IDEs the implemented automated refactorings are working for
968 \emph{most} cases, that is enough for making them useful.
970 I will now present an example of a \emph{corner case} where a program breaks
971 when a refactoring is applied. The example shows an \ExtractMethod refactoring
972 followed by a \MoveMethod refactoring that breaks a program in both the
973 \name{Eclipse} and \name{IntelliJ} IDEs\footnote{The \name{NetBeans} IDE handles this
974 particular situation without altering the program's behavior, mainly because
975 its \refa{Move Method} refactoring implementation is a bit flawed in other ways
976 \see{toolSupport}.}. The target and the destination for the composed
977 refactoring is shown in \myref{lst:correctnessExtractAndMove}. Note that the
978 method \method{m(C c)} of class \type{X} assigns to the field \var{x} of the
979 argument \var{c} that has type \type{C}.
983 \begin{minted}[linenos]{java}
984 // Refactoring target
986 public X x = new X();
998 \begin{minted}[]{java}
999 // Method destination
1001 public void m(C c) {
1003 // If m is called from
1004 // c, then c.x no longer
1011 \caption{The target and the destination for the composition of the Extract
1012 Method and \refa{Move Method} refactorings.}
1013 \label{lst:correctnessExtractAndMove}
1017 The refactoring sequence works by extracting line 6 through 8 from the original
1018 class \type{C} into a method \method{f} with the statements from those lines as
1019 its method body (but with the comment left out, since it will no longer hold any
1020 meaning). The method is then moved to the class \type{X}. The result is shown
1021 in \myref{lst:correctnessExtractAndMoveResult}.
1023 Before the refactoring, the methods \method{m} and \method{n} of class \type{X}
1024 are called on different object instances (see line 6 and 8 of the original class
1025 \type{C} in \cref{lst:correctnessExtractAndMove}). After the refactoring, they
1026 are called on the same object, and the statement on line
1027 3 of class \type{X} (in \cref{lst:correctnessExtractAndMoveResult}) no longer
1028 has the desired effect in our example. The method \method{f} of class \type{C}
1029 is now calling the method \method{f} of class \type{X} (see line 5 of class
1030 \type{C} in \cref{lst:correctnessExtractAndMoveResult}), and the program now
1031 behaves different than before.
1034 \begin{multicols}{2}
1035 \begin{minted}[linenos]{java}
1037 public X x = new X();
1047 \begin{minted}[linenos]{java}
1049 public void m(C c) {
1055 public void f(C c) {
1062 \caption{The result of the composed refactoring.}
1063 \label{lst:correctnessExtractAndMoveResult}
1066 The bug introduced in the previous example is of such a nature\footnote{Caused
1067 by aliasing. See \url{https://en.wikipedia.org/wiki/Aliasing_(computing)}}
1068 that it is very difficult to spot if the refactored code is not covered by
1069 tests. It does not generate compilation errors, and will thus only result in
1070 a runtime error or corrupted data, which might be hard to detect.
1072 \section{Refactoring and the importance of testing}\label{testing}
1074 If you want to refactor, the essential precondition is having solid
1075 tests.\citing{refactoring}
1078 When refactoring, there are roughly three classes of errors that can be made.
1079 The first class of errors are the ones that make the code unable to compile.
1080 These \emph{compile-time} errors are of the nicer kind. They flash up at the
1081 moment they are made (at least when using an IDE), and are usually easy to fix.
1082 The second class are the \emph{runtime} errors. Although they take a bit longer
1083 to surface, they usually manifest after some time in an illegal argument
1084 exception, null pointer exception or similar during the program execution.
1085 These kind of errors are a bit harder to handle, but at least they will show,
1086 eventually. Then there are the \emph{behavior-changing} errors. These errors are
1087 of the worst kind. They do not show up during compilation and they do not turn
1088 on a blinking red light during runtime either. The program can seem to work
1089 perfectly fine with them in play, but the business logic can be damaged in ways
1090 that will only show up over time.
1092 For discovering runtime errors and behavior changes when refactoring, it is
1093 essential to have good test coverage. Testing in this context means writing
1094 automated tests. Manual testing may have its uses, but when refactoring, it is
1095 automated unit testing that dominate. For discovering behavior changes it is
1096 especially important to have tests that cover potential problems, since these
1097 kind of errors does not reveal themselves.
1099 Unit testing is not a way to \emph{prove} that a program is correct, but it is a
1100 way to make you confident that it \emph{probably} works as desired. In the
1101 context of test driven development (commonly known as TDD), the tests are even a
1102 way to define how the program is \emph{supposed} to work. It is then, by
1103 definition, working if the tests are passing.
1105 If the test coverage for a code base is perfect, then it should, theoretically,
1106 be risk-free to perform refactorings on it. This is why automated tests and
1107 refactoring are such a great match.
1109 \subsection{Testing the code from correctness section}
1110 The worst thing that can happen when refactoring is to introduce changes to the
1111 behavior of a program, as in the example on \myref{correctness}. This example
1112 may be trivial, but the essence is clear. The only problem with the example is
1113 that it is not clear how to create automated tests for it, without changing it
1116 Unit tests, as they are known from the different \glosspl{xUnit} around, are
1117 only suitable to test the \emph{result} of isolated operations. They can not
1118 easily (if at all) observe the \emph{history} of a program.
1120 This problem is still open.
1125 Assuming a sequential (non-concurrent) program:
1127 \begin{minted}{java}
1128 tracematch (C c, X x) {
1130 call(* X.m(C)) && args(c) && cflow(within(C));
1132 call(* X.n()) && target(x) && cflow(within(C));
1134 set(C.x) && target(c) && !cflow(m);
1138 { assert x == c.x; }
1142 %\begin{minted}{java}
1143 %tracematch (X x1, X x2) {
1145 % call(* X.m(C)) && target(x1);
1147 % call(* X.n()) && target(x2);
1149 % set(C.x) && !cflow(m) && !cflow(n);
1153 % { assert x1 != x2; }
1158 \section{The project}
1159 The aim of this master project will be to investigate the relationship between a
1160 composite refactoring composed of the \ExtractMethod and \MoveMethod
1161 refactorings, and its impact on one or more software metrics.
1163 The composition of the \ExtractMethod and \MoveMethod refactorings springs
1164 naturally out of the need to move procedures closer to the data they manipulate.
1165 This composed refactoring is not well described in the literature, but it is
1166 implemented in at least one tool called
1167 \name{CodeRush}\footnote{\url{https://help.devexpress.com/\#CodeRush/CustomDocument3519}},
1168 that is an extension for \name{MS Visual
1169 Studio}\footnote{\url{http://www.visualstudio.com/}}. In CodeRush it is called
1170 \refa{Extract Method to
1171 Type}\footnote{\url{https://help.devexpress.com/\#CodeRush/CustomDocument6710}},
1172 but I choose to call it \ExtractAndMoveMethod, since I feel it better
1173 communicates which primitive refactorings it is composed of.
1175 For the metrics, I will at least measure the \metr{Coupling between object
1176 classes} (CBO) metric that is described by Chidamber and Kemerer in their
1177 article \tit{A Metrics Suite for Object Oriented
1178 Design}\citing{metricsSuite1994}.
1180 The project will then consist in implementing the \ExtractAndMoveMethod
1181 refactoring, as well as executing it over a larger code base. Then the effect of
1182 the change must be measured by calculating the chosen software metrics both
1183 before and after the execution. To be able to execute the refactoring
1184 automatically I have to make it analyze code to determine the best selections to
1185 extract into new methods.