essay/master-essay-erlenkr.tex

   1 \documentclass[USenglish,11pt]{ifimaster}
   2 \usepackage{import}
   3 \usepackage[utf8]{inputenc}
   4 \usepackage[T1]{fontenc,url}
   5 \usepackage{lmodern} % using Latin Modern to be able to use bold typewriter font
   6 %\usepackage{mathpazo}
   7 \urlstyle{sf}
   8 \usepackage{listings}
   9 \usepackage{tabularx}
  10 \usepackage{tikz}
  11 \usepackage{tikz-qtree}
  12 \usetikzlibrary{shapes,snakes,trees,arrows,shadows,positioning,calc}
  13 \usepackage{babel,textcomp,csquotes,ifikompendiumforside}
  14
  15 \usepackage{varioref}
  16 \usepackage[hidelinks]{hyperref}
  17 \usepackage{cleveref}
  18 \usepackage[xindy,entrycounter]{glossaries}
  19
  20 \usepackage[style=alphabetic,backend=biber]{biblatex}
  21 \usepackage{amsthm}
  22 \usepackage{mathtools}
  23 \usepackage{graphicx}
  24 % use 'disable' before printing:
  25 \usepackage[disable]{todonotes}
  26 \usepackage{xspace}
  27 \usepackage{he-she}
  28 \usepackage{verbatim}
  29 \usepackage{minted}
  30 \usepackage{multicol}
  31 \usemintedstyle{bw}
  32 \usepackage{perpage} %the perpage package
  33 \MakePerPage{footnote} %the perpage package command
  34
  35 \theoremstyle{definition}
  36 \newtheorem*{wordDef}{Definition}
  37 \newtheorem*{theorem}{Theorem}
  38
  39 \graphicspath{ {./figures/} }
  40
  41 \newcommand{\citing}[1]{~\cite{#1}}
  42 %\newcommand{\myref}[1]{\cref{#1} on \cpageref{#1}}
  43 \newcommand{\myref}[1]{\vref{#1}}
  44
  45 \newcommand{\glossref}[1]{\textsuperscript{(\glsrefentry{#1})}}
  46 %\newcommand{\gloss}[1]{\gls{#1}\glossref{#1}}
  47 %\newcommand{\glosspl}[1]{\glspl{#1}\glossref{#1}}
  48 \newcommand{\gloss}[1]{\gls{#1}}
  49 \newcommand{\glosspl}[1]{\glspl{#1}}
  50
  51 \newcommand{\definition}[1]{\begin{wordDef}#1\end{wordDef}}
  52 \newcommand{\see}[1]{(see \myref{#1})}
  53 \newcommand{\explanation}[3]{\noindent\textbf{\textit{#1}}\\*\emph{When:}
  54 #2\\*\emph{How:} #3\\*[-7px]}
  55
  56 %\newcommand{\type}[1]{\lstinline{#1}}
  57 \newcommand{\code}[1]{\texttt{\textbf{#1}}}
  58 \newcommand{\type}[1]{\code{#1}}
  59 \newcommand{\typeref}[1]{\footnote{\type{#1}}}
  60 \newcommand{\typewithref}[2]{\type{#2}\typeref{#1.#2}}
  61 \newcommand{\method}[1]{\type{#1}}
  62 \newcommand{\methodref}[2]{\footnote{\type{#1}\method{\##2()}}}
  63 \newcommand{\methodwithref}[2]{\method{#2}\footnote{\type{#1}\method{\##2()}}}
  64 \newcommand{\var}[1]{\type{#1}}
  65
  66 \newcommand{\name}[1]{#1}
  67 \newcommand{\tit}[1]{\emph{#1}}
  68 \newcommand{\refa}[1]{\emph{#1}}
  69 \newcommand{\pattern}[1]{\emph{#1}}
  70 \newcommand{\metr}[1]{\emph{#1}}
  71 \newcommand{\ExtractMethod}{\refa{Extract Method}\xspace}
  72 \newcommand{\MoveMethod}{\refa{Move Method}\xspace}
  73 \newcommand{\ExtractAndMoveMethod}{\refa{Extract and Move Method}\xspace}
  74
  75 \newcommand\todoin[2][]{\todo[inline, caption={#2}, #1]{
  76 \begin{minipage}{\textwidth-4pt}#2\end{minipage}}}
  77
  78 \title{Refactoring}
  79 \subtitle{An essay}
  80 \author{Erlend Kristiansen}
  81 \date{2014}
  82
  83 \makeglossaries
  84 \newglossaryentry{profiling}
  85 {
  86   name=profiling,
  87   description={is to run a computer program through a profiler/with a profiler
  88   attached. A profiler is a program for analyzing performance within an
  89 application.  It is used to analyze memory consumption, processing time and
  90 frequency of procedure calls and such},
  91   %see={profiler}
  92 }
  93 \newglossaryentry{profiler}
  94 {
  95   name=profiler,
  96   description={A profiler is a program for analyzing performance within an
  97   application. It is used to analyze memory consumption, processing time and
  98 frequency of procedure calls and such.}
  99 }
 100 \newglossaryentry{xUnit}
 101 {
 102   name={xUnit framework},
 103   description={An xUnit framework is a framework for writing unit tests for a
 104     computer program. It follows the patterns known from the JUnit framework for
 105     Java\citing{fowlerXunit}
 106   },
 107   plural={xUnit frameworks}
 108 }
 109 \newglossaryentry{softwareObfuscation}
 110 {
 111   name={software obfuscation},
 112   description={makes source code harder to read and analyze, while preserving
 113   its semantics}
 114 }
 115 \newglossaryentry{extractClass}
 116 {
 117   name=\refa{Extract Class},
 118   description={The \refa{Extract Class} refactoring works by creating a class,
 119 for then to move members from another class to that class and access them from
 120 the old class via a reference to the new class}
 121 }
 122 \newglossaryentry{designPattern}
 123 {
 124   name={design pattern},
 125   description={A design pattern is a named abstraction, that is meant to solve a
 126   general design problem.  It describes the key aspects of a common problem and
 127 identifies its participators and how they collaborate},
 128   plural={design patterns}
 129 }
 130 \newglossaryentry{extractMethod}
 131 {
 132   name=\refa{Extract Method},
 133   description={The \refa{Extract Method} refactoring is used to extract a
 134 fragment of code from its context and into a new method. A call to the new
 135 method is inlined where the fragment was before. It is used to break code into
 136 logical units, with names that explain their purpose}
 137 }
 138 \newglossaryentry{moveMethod}
 139 {
 140   name=\refa{Move Method},
 141   description={The \refa{Move Method} refactoring is used to move a method from
 142   one class to another. This is useful if the method is using more features of
 143   another class than of the class which it is currently defined. Then all calls
 144   to this method must be updated, or the method must be copied, with the old
 145 method delegating to the new method}
 146 }
 147
 148 \bibliography{bibliography/master-thesis-erlenkr-bibliography}
 149
 150 % UML comment in TikZ:
 151 % ref: https://tex.stackexchange.com/questions/103688/folded-paper-shape-tikz
 152 \makeatletter
 153 \pgfdeclareshape{umlcomment}{
 154   \inheritsavedanchors[from=rectangle] % this is nearly a rectangle
 155   \inheritanchorborder[from=rectangle]
 156   \inheritanchor[from=rectangle]{center}
 157   \inheritanchor[from=rectangle]{north}
 158   \inheritanchor[from=rectangle]{south}
 159   \inheritanchor[from=rectangle]{west}
 160   \inheritanchor[from=rectangle]{east}
 161   % ... and possibly more
 162   \backgroundpath{% this is new
 163   % store lower right in xa/ya and upper right in xb/yb
 164   \southwest \pgf@xa=\pgf@x \pgf@ya=\pgf@y
 165   \northeast \pgf@xb=\pgf@x \pgf@yb=\pgf@y
 166   % compute corner of ‘‘flipped page’’
 167   \pgf@xc=\pgf@xb \advance\pgf@xc by-10pt % this should be a parameter
 168   \pgf@yc=\pgf@yb \advance\pgf@yc by-10pt
 169   % construct main path
 170   \pgfpathmoveto{\pgfpoint{\pgf@xa}{\pgf@ya}}
 171   \pgfpathlineto{\pgfpoint{\pgf@xa}{\pgf@yb}}
 172   \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yb}}
 173   \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@yc}}
 174   \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@ya}}
 175   \pgfpathclose
 176   % add little corner
 177   \pgfpathmoveto{\pgfpoint{\pgf@xc}{\pgf@yb}}
 178   \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yc}}
 179   \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@yc}}
 180   \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yc}}
 181   }
 182 }
 183 \makeatother
 184
 185 \tikzstyle{comment}=[%
 186   draw,
 187   drop shadow,
 188   fill=white,
 189   align=center,
 190   shape=document,
 191   minimum width=20mm,
 192   minimum height=10mm,
 193   shape=umlcomment,
 194   inner sep=2ex,
 195   font=\ttfamily,
 196 ]
 197
 198 %\interfootnotelinepenalty=10000
 199 \renewcommand{\thesection}{\arabic{section}}
 200
 201 \begin{document}
 202 \pagenumbering{roman}
 203 \ififorside
 204 \frontmatter{}
 205
 206 \mainmatter
 207
 208 \chapter*{What is Refactoring?}
 209
 210 This question is best answered by first defining the concept of a
 211 \emph{refactoring}, what it is to \emph{refactor}, and then discuss what aspects
 212 of programming make people want to refactor their code.
 213
 214 \section{Defining refactoring}
 215 Martin Fowler, in his classic book on refactoring\citing{refactoring}, defines a
 216 refactoring like this:
 217
 218 \begin{quote}
 219   \emph{Refactoring} (noun): a change made to the internal
 220   structure\footnote{The structure observable by the programmer.} of software to
 221   make it easier to understand and cheaper to modify without changing its
 222   observable behavior.~\cite[p.~53]{refactoring}
 223 \end{quote}
 224
 225 \noindent This definition assigns additional meaning to the word
 226 \emph{refactoring}, beyond the composition of the prefix \emph{re-}, usually
 227 meaning something like ``again'' or ``anew'', and the word \emph{factoring},
 228 that can mean to isolate the \emph{factors} of something. Here a \emph{factor}
 229 would be close to the mathematical definition of something that divides a
 230 quantity, without leaving a remainder. Fowler is mixing the \emph{motivation}
 231 behind refactoring into his definition. Instead it could be more refined, formed
 232 to only consider the \emph{mechanical} and \emph{behavioral} aspects of
 233 refactoring.  That is to factor the program again, putting it together in a
 234 different way than before, while preserving the behavior of the program. An
 235 alternative definition could then be:
 236
 237 \definition{A \emph{refactoring} is a transformation
 238 done to a program without altering its external behavior.}
 239
 240 From this we can conclude that a refactoring primarily changes how the
 241 \emph{code} of a program is perceived by the \emph{programmer}, and not the
 242 \emph{behavior} experienced by any user of the program. Although the logical
 243 meaning is preserved, such changes could potentially alter the program's
 244 behavior when it comes to performance gain or -penalties. So any logic depending
 245 on the performance of a program could make the program behave differently after
 246 a refactoring.
 247
 248 In the extreme case one could argue that \gloss{softwareObfuscation} is
 249 refactoring. It is often used to protect proprietary software. It restrains
 250 uninvited viewers, so they have a hard time analyzing code that they are not
 251 supposed to know how works. This could be a problem when using a language that
 252 is possible to decompile, such as Java.
 253
 254 Obfuscation could be done composing many, more or less randomly chosen,
 255 refactorings. Then the question arises whether it can be called a
 256 \emph{composite refactoring} or not \see{compositeRefactorings}?  The answer is
 257 not obvious.  First, there is no way to describe the mechanics of software
 258 obfuscation, because there are infinitely many ways to do that. Second,
 259 obfuscation can be thought of as \emph{one operation}: Either the code is
 260 obfuscated, or it is not. Third, it makes no sense to call software obfuscation
 261 \emph{a refactoring}, since it holds different meaning to different people.
 262
 263 This last point is important, since one of the motivations behind defining
 264 different refactorings, is to establish a \emph{vocabulary} for software
 265 professionals to use when reasoning about and discussing programs, similar to
 266 the motivation behind \glosspl{designPattern}\citing{designPatterns}.
 267 \begin{comment}
 268 So for describing \emph{software obfuscation}, it might be more appropriate to
 269 define what you do when performing it rather than precisely defining its
 270 mechanics in terms of other refactorings.
 271 \end{comment}
 272
 273 \section{The etymology of 'refactoring'}
 274 It is a little difficult to pinpoint the exact origin of the word
 275 ``refactoring'', as it seems to have evolved as part of a colloquial
 276 terminology, more than a scientific term. There is no authoritative source for a
 277 formal definition of it.
 278
 279 According to Martin Fowler\citing{etymology-refactoring}, there may also be more
 280 than one origin of the word. The most well-known source, when it comes to the
 281 origin of \emph{refactoring}, is the
 282 Smalltalk\footnote{\label{footNote}Programming language} community and their
 283 infamous \name{Refactoring
 284 Browser}\footnote{\url{http://st-www.cs.illinois.edu/users/brant/Refactory/RefactoringBrowser.html}}
 285 described in the article \tit{A Refactoring Tool for
 286 Smalltalk}\citing{refactoringBrowser1997}, published in 1997.
 287 Allegedly\citing{etymology-refactoring}, the metaphor of factoring programs was
 288 also present in the Forth\textsuperscript{\ref{footNote}} community, and the
 289 word ``refactoring'' is mentioned in a book by Leo Brodie, called \tit{Thinking
 290 Forth}\citing{brodie2004}, first published in 1984\footnote{\tit{Thinking Forth}
 291 was first published in 1984 by the \name{Forth Interest Group}.  Then it was
 292 reprinted in 1994 with minor typographical corrections, before it was
 293 transcribed into an electronic edition typeset in \LaTeX\ and published under a
 294 Creative Commons licence in
 295 2004. The edition cited here is the 2004 edition, but the content should
 296 essentially be as in 1984.}. The exact word is only printed one
 297 place~\cite[p.~232]{brodie2004}, but the term \emph{factoring} is prominent in
 298 the book, that also contains a whole chapter dedicated to (re)factoring, and how
 299 to keep the (Forth) code clean and maintainable.
 300
 301 \begin{quote}
 302   \ldots good factoring technique is perhaps the most important skill for a
 303   Forth programmer.~\cite[p.~172]{brodie2004}
 304 \end{quote}
 305
 306 \noindent Brodie also express what \emph{factoring} means to him:
 307
 308 \begin{quote}
 309   Factoring means organizing code into useful fragments. To make a fragment
 310   useful, you often must separate reusable parts from non-reusable parts. The
 311   reusable parts become new definitions. The non-reusable parts become arguments
 312   or parameters to the definitions.~\cite[p.~172]{brodie2004}
 313 \end{quote}
 314
 315 Fowler claims that the usage of the word \emph{refactoring} did not pass between
 316 the \name{Forth} and \name{Smalltalk} communities, but that it emerged
 317 independently in each of the communities.
 318
 319 \section{Motivation -- Why people refactor}
 320 There are many reasons why people want to refactor their programs. They can for
 321 instance do it to remove duplication, break up long methods or to introduce
 322 design patterns into their software systems. The shared trait for all these are
 323 that peoples' intentions are to make their programs \emph{better}, in some
 324 sense.  But what aspects of their programs are becoming improved?
 325
 326 As just mentioned, people often refactor to get rid of duplication. They are
 327 moving identical or similar code into methods, and are pushing methods up or
 328 down in their class hierarchies. They are making template methods for
 329 overlapping algorithms/functionality, and so on. It is all about gathering what
 330 belongs together and putting it all in one place. The resulting code is then
 331 easier to maintain. When removing the implicit coupling\footnote{When
 332   duplicating code, the duplicate pieces of code might not be coupled, apart
 333 from representing the same functionality. So if this functionality is going to
 334 change, it might need to change in more than one place, thus creating an
 335 implicit coupling between multiple pieces of code.} between code snippets, the
 336 location of a bug is limited to only one place, and new functionality need only
 337 to be added to this one place, instead of a number of places people might not
 338 even remember.
 339
 340 A problem you often encounter when programming, is that a program contains a lot
 341 of long and hard-to-grasp methods. It can then help to break the methods into
 342 smaller ones, using the \gloss{extractMethod} refactoring\citing{refactoring}.
 343 Then you may discover something about a program that you were not aware of
 344 before; revealing bugs you did not know about or could not find due to the
 345 complex structure of your program. \todo{Proof?} Making the methods smaller and
 346 giving good names to the new ones clarifies the algorithms and enhances the
 347 \emph{understandability} of the program \see{magic_number_seven}. This makes
 348 refactoring an excellent method for exploring unknown program code, or code that
 349 you had forgotten that you wrote.
 350
 351 Most primitive refactorings are simple, and usually involves moving code
 352 around\citing{kerievsky2005}. The motivation behind them may first be revealed
 353 when they are combined into larger --- higher level --- refactorings, called
 354 \emph{composite refactorings} \see{compositeRefactorings}. Often the goal of
 355 such a series of refactorings is a design pattern. Thus the design can
 356 \emph{evolve} throughout the lifetime of a program, as opposed to designing
 357 up-front.  It is all about being structured and taking small steps to improve a
 358 program's design.
 359
 360 Many software design pattern are aimed at lowering the coupling between
 361 different classes and different layers of logic. One of the most famous is
 362 perhaps the \pattern{Model-View-Controller}\citing{designPatterns} pattern. It
 363 is aimed at lowering the coupling between the user interface, the business logic
 364 and the data representation of a program. This also has the added benefit that
 365 the business logic could much easier be the target of automated tests, thus
 366 increasing the productivity in the software development process.
 367
 368 Another effect of refactoring is that with the increased separation of concerns
 369 coming out of many refactorings, the \emph{performance} can be improved. When
 370 profiling programs, the problematic parts are narrowed down to smaller parts of
 371 the code, which are easier to tune, and optimization can be performed only where
 372 needed and in a more effective way\citing{refactoring}.
 373
 374 Last, but not least, and this should probably be the best reason to refactor, is
 375 to refactor to \emph{facilitate a program change}. If one has managed to keep
 376 one's code clean and tidy, and the code is not bloated with design patterns that
 377 are not ever going to be needed, then some refactoring might be needed to
 378 introduce a design pattern that is appropriate for the change that is going to
 379 happen.
 380
 381 Refactoring program code --- with a goal in mind --- can give the code itself
 382 more value. That is in the form of robustness to bugs, understandability and
 383 maintainability. Having robust code is an obvious advantage, but
 384 understandability and maintainability are both very important aspects of
 385 software development. By incorporating refactoring in the development process,
 386 bugs are found faster, new functionality is added more easily and code is easier
 387 to understand by the next person exposed to it, which might as well be the
 388 person who wrote it. The consequence of this, is that refactoring can increase
 389 the average productivity of the development process, and thus also add to the
 390 monetary value of a business in the long run. The perspective on productivity
 391 and money should also be able to open the eyes of the many nearsighted managers
 392 that seldom see beyond the next milestone.
 393
 394 \section{The magical number seven}\label{magic_number_seven}
 395 The article \tit{The magical number seven, plus or minus two: some limits on our
 396 capacity for processing information}\citing{miller1956} by George A.  Miller,
 397 was published in the journal \name{Psychological Review} in 1956.  It presents
 398 evidence that support that the capacity of the number of objects a human being
 399 can hold in its working memory is roughly seven, plus or minus two objects. This
 400 number varies a bit depending on the nature and complexity of the objects, but
 401 is according to Miller ``\ldots never changing so much as to be
 402 unrecognizable.''
 403
 404 Miller's article culminates in the section called \emph{Recoding}, a term he
 405 borrows from communication theory. The central result in this section is that by
 406 recoding information, the capacity of the amount of information that a human can
 407 process at a time is increased. By \emph{recoding}, Miller means to group
 408 objects together in chunks, and give each chunk a new name that it can be
 409 remembered by.
 410
 411 \begin{quote}
 412   \ldots recoding is an extremely powerful weapon for increasing the amount of
 413   information that we can deal with.~\cite[p.~95]{miller1956}
 414 \end{quote}
 415
 416 By organizing objects into patterns of ever growing depth, one can memorize and
 417 process a much larger amount of data than if it were to be represented as its
 418 basic pieces. This grouping and renaming is analogous to how many refactorings
 419 work, by grouping pieces of code and give them a new name.  Examples are the
 420 fundamental \ExtractMethod and \refa{Extract Class}
 421 refactorings\citing{refactoring}.
 422
 423 An example from the article addresses the problem of memorizing a sequence of
 424 binary digits. The example presented here is a slightly modified version of the
 425 one presented in the original article\citing{miller1956}, but it preserves the
 426 essence of it. Let us say we have the following sequence of
 427 16 binary digits: ``1010001001110011''. Most of us will have a hard time
 428 memorizing this sequence by only reading it once or twice. Imagine if we instead
 429 translate it to this sequence: ``A273''. If you have a background from computer
 430 science, it will be obvious that the latter sequence is the first sequence
 431 recoded to be represented by digits in base 16. Most people should be able to
 432 memorize this last sequence by only looking at it once.
 433
 434 Another result from the Miller article is that when the amount of information a
 435 human must interpret increases, it is crucial that the translation from one code
 436 to another must be almost automatic for the subject to be able to remember the
 437 translation, before \heshe is presented with new information to recode.  Thus
 438 learning and understanding how to best organize certain kinds of data is
 439 essential to efficiently handle that kind of data in the future. This is much
 440 like when humans learn to read. First they must learn how to recognize letters.
 441 Then they can learn distinct words, and later read sequences of words that form
 442 whole sentences. Eventually, most of them will be able to read whole books and
 443 briefly retell the important parts of its content. This suggest that the use of
 444 design patterns is a good idea when reasoning about computer programs. With
 445 extensive use of design patterns when creating complex program structures, one
 446 does not always have to read whole classes of code to comprehend how they
 447 function, it may be sufficient to only see the name of a class to almost fully
 448 understand its responsibilities.
 449
 450 \begin{quote}
 451   Our language is tremendously useful for repackaging material into a few chunks
 452   rich in information.~\cite[p.~95]{miller1956}
 453 \end{quote}
 454
 455 Without further evidence, these results at least indicate that refactoring
 456 source code into smaller units with higher cohesion and, when needed,
 457 introducing appropriate design patterns, should aid in the cause of creating
 458 computer programs that are easier to maintain and have code that is easier (and
 459 better) understood.
 460
 461 \section{Notable contributions to the refactoring literature}
 462 \todoin{Update with more contributions}
 463
 464 \begin{description}
 465   \item[1992] William F. Opdyke submits his doctoral dissertation called
 466     \tit{Refactoring Object-Oriented Frameworks}\citing{opdyke1992}. This work
 467     defines a set of refactorings, that are behavior preserving given that their
 468     preconditions are met. The dissertation is focused on the automation of
 469     refactorings.
 470   \item[1999] Martin Fowler et al.: \tit{Refactoring: Improving the Design of
 471     Existing Code}\citing{refactoring}. This is maybe the most influential text
 472     on refactoring. It bares similarities with Opdykes thesis\citing{opdyke1992}
 473     in the way that it provides a catalog of refactorings. But Fowler's book is
 474     more about the craft of refactoring, as he focuses on establishing a
 475     vocabulary for refactoring, together with the mechanics of different
 476     refactorings and when to perform them. His methodology is also founded on
 477     the principles of test-driven development.
 478   \item[2005] Joshua Kerievsky: \tit{Refactoring to
 479     Patterns}\citing{kerievsky2005}. This book is heavily influenced by Fowler's
 480     \tit{Refactoring}\citing{refactoring} and the ``Gang of Four'' \tit{Design
 481     Patterns}\citing{designPatterns}. It is building on the refactoring
 482     catalogue from Fowler's book, but is trying to bridge the gap between
 483     \emph{refactoring} and \emph{design patterns} by providing a series of
 484     higher-level composite refactorings, that makes code evolve toward or away
 485     from certain design patterns. The book is trying to build up the reader's
 486     intuition around \emph{why} one would want to use a particular design
 487     pattern, and not just \emph{how}. The book is encouraging evolutionary
 488     design \see{relationToDesignPatterns}.
 489 \end{description}
 490
 491 \section{Tool support (for Java)}\label{toolSupport}
 492 This section will briefly compare the refactoring support of the three IDEs
 493 \name{Eclipse}\footnote{\url{http://www.eclipse.org/}}, \name{IntelliJ
 494 IDEA}\footnote{The IDE under comparison is the \name{Community Edition},
 495 \url{http://www.jetbrains.com/idea/}} and
 496 \name{NetBeans}\footnote{\url{https://netbeans.org/}}. These are the most
 497 popular Java IDEs\citing{javaReport2011}.
 498
 499 All three IDEs provide support for the most useful refactorings, like the
 500 different extract, move and rename refactorings. In fact, Java-targeted IDEs are
 501 known for their good refactoring support, so this did not appear as a big
 502 surprise.
 503
 504 The IDEs seem to have excellent support for the \ExtractMethod refactoring, so
 505 at least they have all passed the first ``refactoring
 506 rubicon''\citing{fowlerRubicon2001,secondRubicon2012}.
 507
 508 Regarding the \gloss{moveMethod} refactoring, the \name{Eclipse} and
 509 \name{IntelliJ} IDEs do the job in very similar manners. In most situations they
 510 both do a satisfying job by producing the expected outcome. But they do nothing
 511 to check that the result does not break the semantics of the program
 512 \see{correctness}.
 513 The \name{NetBeans} IDE implements this refactoring in a somewhat
 514 unsophisticated way. For starters, the refactoring's default destination for the
 515 move, is the same class as the method already resides in, although it refuses to
 516 perform the refactoring if chosen.  But the worst part is, that if moving the
 517 method \method{f} of the class \type{C} to the class \type{X}, it will break the
 518 code.  The result is shown in \myref{lst:moveMethod_NetBeans}.
 519
 520 \begin{listing}
 521 \begin{multicols}{2}
 522 \begin{minted}[samepage]{java}
 523 public class C {
 524     private X x;
 525     ...
 526     public void f() {
 527         x.m();
 528         x.n();
 529     }
 530 }
 531 \end{minted}
 532
 533 \columnbreak
 534
 535 \begin{minted}[samepage]{java}
 536 public class X {
 537     ...
 538     public void f(C c) {
 539         c.x.m();
 540         c.x.n();
 541     }
 542 }
 543 \end{minted}
 544 \end{multicols}
 545 \caption{Moving method \method{f} from \type{C} to \type{X}.}
 546 \label{lst:moveMethod_NetBeans}
 547 \end{listing}
 548
 549 \name{NetBeans} will try to create code that call the methods \method{m} and \method{n}
 550 of \type{X} by accessing them through \var{c.x}, where \var{c} is a parameter of
 551 type \type{C} that is added the method \method{f} when it is moved. (This is
 552 seldom the desired outcome of this refactoring, but ironically, this ``feature''
 553 keeps \name{NetBeans} from breaking the code in the example from \myref{correctness}.)
 554 If \var{c.x} for some reason is inaccessible to \type{X}, as in this case, the
 555 refactoring breaks the code, and it will not compile. \name{NetBeans} presents a
 556 preview of the refactoring outcome, but the preview does not catch it if the IDE
 557 is about break the program.
 558
 559 The IDEs under investigation seem to have fairly good support for primitive
 560 refactorings, but what about more complex ones, such as
 561 \gloss{extractClass}\citing{refactoring}? \name{IntelliJ} handles this in a
 562 fairly good manner, although, in the case of private methods, it leaves unused
 563 methods behind. These are methods that delegate to a field with the type of the
 564 new class, but are not used anywhere. \name{Eclipse} has added its own quirk to
 565 the \refa{Extract Class} refactoring, and only allows for \emph{fields} to be
 566 moved to a new class, \emph{not methods}. This makes it effectively only
 567 extracting a data structure, and calling it \refa{Extract Class} is a little
 568 misleading.  One would often be better off with textual extract and paste than
 569 using the \refa{Extract Class} refactoring in \name{Eclipse}. When it comes to
 570 \name{NetBeans}, it does not even show an attempt on providing this refactoring.
 571
 572 \todoin{Visual Studio (C++/C\#), Smalltalk refactoring browser?,
 573 second refactoring rubicon?}
 574
 575 \section{The relation to design patterns}\label{relationToDesignPatterns}
 576
 577 Refactoring and design patterns have at least one thing in common, they are both
 578 promoted by advocates of \emph{clean code}\citing{cleanCode} as fundamental
 579 tools on the road to more maintainable and extendable source code.
 580
 581 \begin{quote}
 582   Design patterns help you determine how to reorganize a design, and they can
 583   reduce the amount of refactoring you need to do
 584   later.~\cite[p.~353]{designPatterns}
 585 \end{quote}
 586
 587 Although sometimes associated with
 588 over-engineering\citing{kerievsky2005,refactoring}, design patterns are in
 589 general assumed to be good for maintainability of source code.  That may be
 590 because many of them are designed to support the \emph{open/closed principle} of
 591 object-oriented programming. The principle was first formulated by Bertrand
 592 Meyer, the creator of the Eiffel programming language, like this: ``Modules
 593 should be both open and closed.''\citing{meyer1988} It has been popularized,
 594 with this as a common version:
 595
 596 \begin{quote}
 597   Software entities (classes, modules, functions, etc.) should be open for
 598   extension, but closed for modification.\footnote{See
 599     \url{http://c2.com/cgi/wiki?OpenClosedPrinciple} or
 600     \url{https://en.wikipedia.org/wiki/Open/closed_principle}}
 601 \end{quote}
 602
 603 Maintainability is often thought of as the ability to be able to introduce new
 604 functionality without having to change too much of the old code. When
 605 refactoring, the motivation is often to facilitate adding new functionality. It
 606 is about factoring the old code in a way that makes the new functionality being
 607 able to benefit from the functionality already residing in a software system,
 608 without having to copy old code into new. Then, next time someone shall add new
 609 functionality, it is less likely that the old code has to change. Assuming that
 610 a design pattern is the best way to get rid of duplication and assist in
 611 implementing new functionality, it is reasonable to conclude that a design
 612 pattern often is the target of a series of refactorings. Having a repertoire of
 613 design patterns can also help in knowing when and how to refactor a program to
 614 make it reflect certain desired characteristics.
 615
 616 \begin{quote}
 617   There is a natural relation between patterns and refactorings. Patterns are
 618   where you want to be; refactorings are ways to get there from somewhere
 619   else.~\cite[p.~107]{refactoring}
 620 \end{quote}
 621
 622 This quote is wise in many contexts, but it is not always appropriate to say
 623 ``Patterns are where you want to be\ldots''. \emph{Sometimes}, patterns are
 624 where you want to be, but only because it will benefit your design. It is not
 625 true that one should always try to incorporate as many design patterns as
 626 possible into a program. It is not like they have intrinsic value. They only add
 627 value to a system when they support its design. Otherwise, the use of design
 628 patterns may only lead to a program that is more complex than necessary.
 629
 630 \begin{quote}
 631   The overuse of patterns tends to result from being patterns happy. We are
 632   \emph{patterns happy} when we become so enamored of patterns that we simply
 633   must use them in our code.~\cite[p.~24]{kerievsky2005}
 634 \end{quote}
 635
 636 This can easily happen when relying largely on up-front design. Then it is
 637 natural, in the very beginning, to try to build in all the flexibility that one
 638 believes will be necessary throughout the lifetime of a software system.
 639 According to Joshua Kerievsky ``That sounds reasonable --- if you happen to be
 640 psychic.''~\cite[p.~1]{kerievsky2005} He is advocating what he believes is a
 641 better approach: To let software continually evolve. To start with a simple
 642 design that meets today's needs, and tackle future needs by refactoring to
 643 satisfy them. He believes that this is a more economic approach than investing
 644 time and money into a design that inevitably is going to change. By relying on
 645 continuously refactoring a system, its design can be made simpler without
 646 sacrificing flexibility. To be able to fully rely on this approach, it is of
 647 utter importance to have a reliable suit of tests to lean on \see{testing}. This
 648 makes the design process more natural and less characterized by difficult
 649 decisions that has to be made before proceeding in the process, and that is
 650 going to define a project for all of its unforeseeable future.
 651
 652 \begin{comment}
 653
 654 \section{Classification of refactorings}
 655 % only interesting refactorings
 656 % with 2 detailed examples? One for structured and one for intra-method?
 657 % Is replacing Bubblesort with Quick Sort considered a refactoring?
 658
 659 \subsection{Structural refactorings}
 660
 661 \subsubsection{Primitive refactorings}
 662
 663 % Composing Methods
 664 \explanation{Extract Method}{You have a code fragment that can be grouped
 665 together.}{Turn the fragment into a method whose name explains the purpose of
 666 the method.}
 667
 668 \explanation{Inline Method}{A method's body is just as clear as its name.}{Put
 669 the method's body into the body of its callers and remove the method.}
 670
 671 \explanation{Inline Temp}{You have a temp that is assigned to once with a simple
 672 expression, and the temp is getting in the way of other refactorings.}{Replace
 673 all references to that temp with the expression}
 674
 675 % Moving Features Between Objects
 676 \explanation{Move Method}{A method is, or will be, using or used by more
 677 features of another class than the class on which it is defined.}{Create a new
 678 method with a similar body in the class it uses most. Either turn the old method
 679 into a simple delegation, or remove it altogether.}
 680
 681 \explanation{Move Field}{A field is, or will be, used by another class more than
 682 the class on which it is defined}{Create a new field in the target class, and
 683 change all its users.}
 684
 685 % Organizing Data
 686 \explanation{Replace Magic Number with Symbolic Constant}{You have a literal
 687 number with a particular meaning.}{Create a constant, name it after the meaning,
 688 and replace the number with it.}
 689
 690 \explanation{Encapsulate Field}{There is a public field.}{Make it private and
 691 provide accessors.}
 692
 693 \explanation{Replace Type Code with Class}{A class has a numeric type code that
 694 does not affect its behavior.}{Replace the number with a new class.}
 695
 696 \explanation{Replace Type Code with Subclasses}{You have an immutable type code
 697 that affects the behavior of a class.}{Replace the type code with subclasses.}
 698
 699 \explanation{Replace Type Code with State/Strategy}{You have a type code that
 700 affects the behavior of a class, but you cannot use subclassing.}{Replace the
 701 type code with a state object.}
 702
 703 % Simplifying Conditional Expressions
 704 \explanation{Consolidate Duplicate Conditional Fragments}{The same fragment of
 705 code is in all branches of a conditional expression.}{Move it outside of the
 706 expression.}
 707
 708 \explanation{Remove Control Flag}{You have a variable that is acting as a
 709 control flag fro a series of boolean expressions.}{Use a break or return
 710 instead.}
 711
 712 \explanation{Replace Nested Conditional with Guard Clauses}{A method has
 713 conditional behavior that does not make clear the normal path of
 714 execution.}{Use guard clauses for all special cases.}
 715
 716 \explanation{Introduce Null Object}{You have repeated checks for a null
 717 value.}{Replace the null value with a null object.}
 718
 719 \explanation{Introduce Assertion}{A section of code assumes something about the
 720 state of the program.}{Make the assumption explicit with an assertion.}
 721
 722 % Making Method Calls Simpler
 723 \explanation{Rename Method}{The name of a method does not reveal its
 724 purpose.}{Change the name of the method}
 725
 726 \explanation{Add Parameter}{A method needs more information from its
 727 caller.}{Add a parameter for an object that can pass on this information.}
 728
 729 \explanation{Remove Parameter}{A parameter is no longer used by the method
 730 body.}{Remove it.}
 731
 732 %\explanation{Parameterize Method}{Several methods do similar things but with
 733 %different values contained in the method.}{Create one method that uses a
 734 %parameter for the different values.}
 735
 736 \explanation{Preserve Whole Object}{You are getting several values from an
 737 object and passing these values as parameters in a method call.}{Send the whole
 738 object instead.}
 739
 740 \explanation{Remove Setting Method}{A field should be set at creation time and
 741 never altered.}{Remove any setting method for that field.}
 742
 743 \explanation{Hide Method}{A method is not used by any other class.}{Make the
 744 method private.}
 745
 746 \explanation{Replace Constructor with Factory Method}{You want to do more than
 747 simple construction when you create an object}{Replace the constructor with a
 748 factory method.}
 749
 750 % Dealing with Generalization
 751 \explanation{Pull Up Field}{Two subclasses have the same field.}{Move the field
 752 to the superclass.}
 753
 754 \explanation{Pull Up Method}{You have methods with identical results on
 755 subclasses.}{Move them to the superclass.}
 756
 757 \explanation{Push Down Method}{Behavior on a superclass is relevant only for
 758 some of its subclasses.}{Move it to those subclasses.}
 759
 760 \explanation{Push Down Field}{A field is used only by some subclasses.}{Move the
 761 field to those subclasses}
 762
 763 \explanation{Extract Interface}{Several clients use the same subset of a class's
 764 interface, or two classes have part of their interfaces in common.}{Extract the
 765 subset into an interface.}
 766
 767 \explanation{Replace Inheritance with Delegation}{A subclass uses only part of a
 768 superclasses interface or does not want to inherit data.}{Create a field for the
 769 superclass, adjust methods to delegate to the superclass, and remove the
 770 subclassing.}
 771
 772 \explanation{Replace Delegation with Inheritance}{You're using delegation and
 773 are often writing many simple delegations for the entire interface}{Make the
 774 delegating class a subclass of the delegate.}
 775
 776 \subsubsection{Composite refactorings}
 777
 778 % Composing Methods
 779 % \explanation{Replace Method with Method Object}{}{}
 780
 781 % Moving Features Between Objects
 782 \explanation{Extract Class}{You have one class doing work that should be done by
 783 two}{Create a new class and move the relevant fields and methods from the old
 784 class into the new class.}
 785
 786 \explanation{Inline Class}{A class isn't doing very much.}{Move all its features
 787 into another class and delete it.}
 788
 789 \explanation{Hide Delegate}{A client is calling a delegate class of an
 790 object.}{Create Methods on the server to hide the delegate.}
 791
 792 \explanation{Remove Middle Man}{A class is doing to much simple delegation.}{Get
 793 the client to call the delegate directly.}
 794
 795 % Organizing Data
 796 \explanation{Replace Data Value with Object}{You have a data item that needs
 797 additional data or behavior.}{Turn the data item into an object.}
 798
 799 \explanation{Change Value to Reference}{You have a class with many equal
 800 instances that you want to replace with a single object.}{Turn the object into a
 801 reference object.}
 802
 803 \explanation{Encapsulate Collection}{A method returns a collection}{Make it
 804 return a read-only view and provide add/remove methods.}
 805
 806 % \explanation{Replace Array with Object}{}{}
 807
 808 \explanation{Replace Subclass with Fields}{You have subclasses that vary only in
 809 methods that return constant data.}{Change the methods to superclass fields and
 810 eliminate the subclasses.}
 811
 812 % Simplifying Conditional Expressions
 813 \explanation{Decompose Conditional}{You have a complicated conditional
 814 (if-then-else) statement.}{Extract methods from the condition, then part, an
 815 else part.}
 816
 817 \explanation{Consolidate Conditional Expression}{You have a sequence of
 818 conditional tests with the same result.}{Combine them into a single conditional
 819 expression and extract it.}
 820
 821 \explanation{Replace Conditional with Polymorphism}{You have a conditional that
 822 chooses different behavior depending on the type of an object.}{Move each leg
 823 of the conditional to an overriding method in a subclass. Make the original
 824 method abstract.}
 825
 826 % Making Method Calls Simpler
 827 \explanation{Replace Parameter with Method}{An object invokes a method, then
 828 passes the result as a parameter for a method. The receiver can also invoke this
 829 method.}{Remove the parameter and let the receiver invoke the method.}
 830
 831 \explanation{Introduce Parameter Object}{You have a group of parameters that
 832 naturally go together.}{Replace them with an object.}
 833
 834 % Dealing with Generalization
 835 \explanation{Extract Subclass}{A class has features that are used only in some
 836 instances.}{Create a subclass for that subset of features.}
 837
 838 \explanation{Extract Superclass}{You have two classes with similar
 839 features.}{Create a superclass and move the common features to the
 840 superclass.}
 841
 842 \explanation{Collapse Hierarchy}{A superclass and subclass are not very
 843 different.}{Merge them together.}
 844
 845 \explanation{Form Template Method}{You have two methods in subclasses that
 846 perform similar steps in the same order, yet the steps are different.}{Get the
 847 steps into methods with the same signature, so that the original methods become
 848 the same. Then you can pull them up.}
 849
 850
 851 \subsection{Functional refactorings}
 852
 853 \explanation{Substitute Algorithm}{You want to replace an algorithm with one
 854 that is clearer.}{Replace the body of the method with the new algorithm.}
 855
 856 \end{comment}
 857
 858 \section{The impact on software quality}
 859
 860 \subsection{What is software quality?}
 861 The term \emph{software quality} has many meanings. It all depends on the
 862 context we put it in. If we look at it with the eyes of a software developer, it
 863 usually means that the software is easily maintainable and testable, or in other
 864 words, that it is \emph{well designed}. This often correlates with the
 865 management scale, where \emph{keeping the schedule} and \emph{customer
 866 satisfaction} is at the center. From the customers point of view, in addition to
 867 good usability, \emph{performance} and \emph{lack of bugs} is always
 868 appreciated, measurements that are also shared by the software developer. (In
 869 addition, such things as good documentation could be measured, but this is out
 870 of the scope of this document.)
 871
 872 \subsection{The impact on performance}
 873 \begin{quote}
 874   Refactoring certainly will make software go more slowly\footnote{With todays
 875   compiler optimization techniques and performance tuning of e.g. the Java
 876 virtual machine, the penalties of object creation and method calls are
 877 debatable.}, but it also makes the software more amenable to performance
 878 tuning.~\cite[p.~69]{refactoring}
 879 \end{quote}
 880
 881 \noindent There is a common belief that refactoring compromises performance, due
 882 to increased degree of indirection and that polymorphism is slower than
 883 conditionals.
 884
 885 In a survey, Demeyer\citing{demeyer2002} disproves this view in the case of
 886 polymorphism. He did an experiment on, what he calls, ``Transform Self Type
 887 Checks'' where you introduce a new polymorphic method and a new class hierarchy
 888 to get rid of a class' type checking of a ``type attribute``. He uses this kind
 889 of transformation to represent other ways of replacing conditionals with
 890 polymorphism as well. The experiment is performed on the C++ programming
 891 language and with three different compilers and platforms. Demeyer concludes
 892 that, with compiler optimization turned on, polymorphism beats middle to large
 893 sized if-statements and does as well as case-statements.  (In accordance with
 894 his hypothesis, due to similarities between the way C++ handles polymorphism and
 895 case-statements.)
 896
 897 \begin{quote}
 898   The interesting thing about performance is that if you analyze most programs,
 899   you find that they waste most of their time in a small fraction of the
 900   code.~\cite[p.~70]{refactoring}
 901 \end{quote}
 902
 903 \noindent So, although an increased amount of method calls could potentially
 904 slow down programs, one should avoid premature optimization and sacrificing good
 905 design, leaving the performance tuning until after \gloss{profiling} the
 906 software and having isolated the actual problem areas.
 907
 908 \section{Composite refactorings}\label{compositeRefactorings}
 909 \todo{motivation, examples, manual vs automated?, what about refactoring in a
 910 very large code base?}
 911 Generally, when thinking about refactoring, at the mechanical level, there are
 912 essentially two kinds of refactorings. There are the \emph{primitive}
 913 refactorings, and the \emph{composite} refactorings.
 914
 915 \definition{A \emph{primitive refactoring} is a refactoring that cannot be
 916 expressed in terms of other refactorings.}
 917
 918 \noindent Examples are the \refa{Pull Up Field} and \refa{Pull Up
 919 Method} refactorings\citing{refactoring}, that move members up in their class
 920 hierarchies.
 921
 922 \definition{A \emph{composite refactoring} is a refactoring that can be
 923 expressed in terms of two or more other refactorings.}
 924
 925 \noindent An example of a composite refactoring is the \refa{Extract
 926 Superclass} refactoring\citing{refactoring}. In its simplest form, it is composed
 927 of the previously described primitive refactorings, in addition to the
 928 \refa{Pull Up Constructor Body} refactoring\citing{refactoring}. It works
 929 by creating an abstract superclass that the target class(es) inherits from, then
 930 by applying \refa{Pull Up Field}, \refa{Pull Up Method} and
 931 \refa{Pull Up Constructor Body} on the members that are to be members of
 932 the new superclass. If there are multiple classes in play, their interfaces may
 933 need to be united with the help of some rename refactorings, before extracting
 934 the superclass. For an overview of the \refa{Extract Superclass}
 935 refactoring, see \myref{fig:extractSuperclass}.
 936
 937 \begin{figure}[h]
 938   \centering
 939   \includegraphics[angle=270,width=\linewidth]{extractSuperclassItalic.pdf}
 940   \caption{The Extract Superclass refactoring, with united interfaces.}
 941   \label{fig:extractSuperclass}
 942 \end{figure}
 943
 944 \section{Manual vs. automated refactorings}
 945 Refactoring is something every programmer does, even if \heshe does not known
 946 the term \emph{refactoring}. Every refinement of source code that does not alter
 947 the program's behavior is a refactoring. For small refactorings, such as
 948 \ExtractMethod, executing it manually is a manageable task, but is still prone
 949 to errors. Getting it right the first time is not easy, considering the method
 950 signature and all the other aspects of the refactoring that has to be in place.
 951
 952 Consider the renaming of classes, methods and fields. For complex programs these
 953 refactorings are almost impossible to get right.  Attacking them with textual
 954 search and replace, or even regular expressions, will fall short on these tasks.
 955 Then it is crucial to have proper tool support that can perform them
 956 automatically. Tools that can parse source code and thus have semantic knowledge
 957 about which occurrences of which names belong to what construct in the program.
 958 For even trying to perform one of these complex task manually, one would have to
 959 be very confident on the existing test suite \see{testing}.
 960
 961 \section{Correctness of refactorings}\label{correctness}
 962 For automated refactorings to be truly useful, they must show a high degree of
 963 behavior preservation.  This last sentence might seem obvious, but there are
 964 examples of refactorings in existing tools that break programs. In an ideal
 965 world, every automated refactoring would be ``complete'', in the sense that it
 966 would never break a program. In an ideal world, every program would also be free
 967 from bugs. In modern IDEs the implemented automated refactorings are working for
 968 \emph{most} cases, that is enough for making them useful.
 969
 970 I will now present an example of a \emph{corner case} where a program breaks
 971 when a refactoring is applied. The example shows an \ExtractMethod refactoring
 972 followed by a \MoveMethod refactoring that breaks a program in both the
 973 \name{Eclipse} and \name{IntelliJ} IDEs\footnote{The \name{NetBeans} IDE handles this
 974   particular situation without altering the program's behavior, mainly because
 975   its \refa{Move Method} refactoring implementation is a bit flawed in other ways
 976   \see{toolSupport}.}.  The target and the destination for the composed
 977   refactoring is shown in \myref{lst:correctnessExtractAndMove}.  Note that the
 978   method \method{m(C c)} of class \type{X} assigns to the field \var{x} of the
 979   argument \var{c} that has type \type{C}.
 980
 981 \begin{listing}[h]
 982 \begin{multicols}{2}
 983 \begin{minted}[linenos]{java}
 984 // Refactoring target
 985 public class C {
 986   public X x = new X();
 987
 988   public void f() {
 989     x.m(this);
 990     // Not the same x
 991     x.n();
 992   }
 993 }
 994 \end{minted}
 995
 996 \columnbreak
 997
 998 \begin{minted}[]{java}
 999 // Method destination
1000 public class X {
1001   public void m(C c) {
1002     c.x = new X();
1003     // If m is called from
1004     // c, then c.x no longer
1005     // equals 'this'
1006   }
1007   public void n() {}
1008 }
1009 \end{minted}
1010 \end{multicols}
1011 \caption{The target and the destination for the composition of the Extract
1012 Method and \refa{Move Method} refactorings.}
1013 \label{lst:correctnessExtractAndMove}
1014 \end{listing}
1015
1016
1017 The refactoring sequence works by extracting line 6 through 8 from the original
1018 class \type{C} into a method \method{f} with the statements from those lines as
1019 its method body (but with the comment left out, since it will no longer hold any
1020 meaning). The method is then moved to the class \type{X}.  The result is shown
1021 in \myref{lst:correctnessExtractAndMoveResult}.
1022
1023 Before the refactoring, the methods \method{m} and \method{n} of class \type{X}
1024 are called on different object instances (see line 6 and 8 of the original class
1025 \type{C} in \cref{lst:correctnessExtractAndMove}). After the refactoring, they
1026 are called on the same object, and the statement on line
1027 3 of class \type{X} (in \cref{lst:correctnessExtractAndMoveResult}) no longer
1028   has the desired effect in our example. The method \method{f} of class \type{C}
1029   is now calling the method \method{f} of class \type{X} (see line 5 of class
1030   \type{C} in \cref{lst:correctnessExtractAndMoveResult}), and the program now
1031   behaves different than before.
1032
1033 \begin{listing}[h]
1034 \begin{multicols}{2}
1035 \begin{minted}[linenos]{java}
1036 public class C {
1037     public X x = new X();
1038
1039     public void f() {
1040         x.f(this);
1041     }
1042 }
1043 \end{minted}
1044
1045 \columnbreak
1046
1047 \begin{minted}[linenos]{java}
1048 public class X {
1049     public void m(C c) {
1050         c.x = new X();
1051     }
1052     public void n() {}
1053     // Extracted and
1054     // moved method
1055     public void f(C c) {
1056         m(c);
1057         n();
1058     }
1059 }
1060 \end{minted}
1061 \end{multicols}
1062 \caption{The result of the composed refactoring.}
1063 \label{lst:correctnessExtractAndMoveResult}
1064 \end{listing}
1065
1066 The bug introduced in the previous example is of such a nature\footnote{Caused
1067   by aliasing. See \url{https://en.wikipedia.org/wiki/Aliasing_(computing)}}
1068   that it is very difficult to spot if the refactored code is not covered by
1069   tests.  It does not generate compilation errors, and will thus only result in
1070   a runtime error or corrupted data, which might be hard to detect.
1071
1072 \section{Refactoring and the importance of testing}\label{testing}
1073 \begin{quote}
1074   If you want to refactor, the essential precondition is having solid
1075   tests.\citing{refactoring}
1076 \end{quote}
1077
1078 When refactoring, there are roughly three classes of errors that can be made.
1079 The first class of errors are the ones that make the code unable to compile.
1080 These \emph{compile-time} errors are of the nicer kind. They flash up at the
1081 moment they are made (at least when using an IDE), and are usually easy to fix.
1082 The second class are the \emph{runtime} errors. Although they take a bit longer
1083 to surface, they usually manifest after some time in an illegal argument
1084 exception, null pointer exception or similar during the program execution.
1085 These kind of errors are a bit harder to handle, but at least they will show,
1086 eventually. Then there are the \emph{behavior-changing} errors. These errors are
1087 of the worst kind. They do not show up during compilation and they do not turn
1088 on a blinking red light during runtime either. The program can seem to work
1089 perfectly fine with them in play, but the business logic can be damaged in ways
1090 that will only show up over time.
1091
1092 For discovering runtime errors and behavior changes when refactoring, it is
1093 essential to have good test coverage. Testing in this context means writing
1094 automated tests. Manual testing may have its uses, but when refactoring, it is
1095 automated unit testing that dominate. For discovering behavior changes it is
1096 especially important to have tests that cover potential problems, since these
1097 kind of errors does not reveal themselves.
1098
1099 Unit testing is not a way to \emph{prove} that a program is correct, but it is a
1100 way to make you confident that it \emph{probably} works as desired.  In the
1101 context of test driven development (commonly known as TDD), the tests are even a
1102 way to define how the program is \emph{supposed} to work.  It is then, by
1103 definition, working if the tests are passing.
1104
1105 If the test coverage for a code base is perfect, then it should, theoretically,
1106 be risk-free to perform refactorings on it. This is why automated tests and
1107 refactoring are such a great match.
1108
1109 \subsection{Testing the code from correctness section}
1110 The worst thing that can happen when refactoring is to introduce changes to the
1111 behavior of a program, as in the example on \myref{correctness}. This example
1112 may be trivial, but the essence is clear. The only problem with the example is
1113 that it is not clear how to create automated tests for it, without changing it
1114 in intrusive ways.
1115
1116 Unit tests, as they are known from the different \glosspl{xUnit} around, are
1117 only suitable to test the \emph{result} of isolated operations. They can not
1118 easily (if at all) observe the \emph{history} of a program.
1119
1120 This problem is still open.
1121
1122 \todoin{Write?}
1123 \begin{comment}
1124
1125 Assuming a sequential (non-concurrent) program:
1126
1127 \begin{minted}{java}
1128 tracematch (C c, X x) {
1129   sym m before:
1130     call(* X.m(C)) && args(c) && cflow(within(C));
1131   sym n before:
1132     call(* X.n()) && target(x) && cflow(within(C));
1133   sym setCx after:
1134     set(C.x) && target(c) && !cflow(m);
1135
1136   m n
1137
1138   { assert x == c.x; }
1139 }
1140 \end{minted}
1141
1142 %\begin{minted}{java}
1143 %tracematch (X x1, X x2) {
1144 %  sym m before:
1145 %    call(* X.m(C)) && target(x1);
1146 %  sym n before:
1147 %    call(* X.n()) && target(x2);
1148 %  sym setX after:
1149 %    set(C.x) && !cflow(m) && !cflow(n);
1150 %
1151 %  m n
1152 %
1153 %  { assert x1 != x2; }
1154 %}
1155 %\end{minted}
1156 \end{comment}
1157
1158 \section{The project}
1159 The aim of this master project will be to investigate the relationship between a
1160 composite refactoring composed of the \ExtractMethod and \MoveMethod
1161 refactorings, and its impact on one or more software metrics.
1162
1163 The composition of the \ExtractMethod and \MoveMethod refactorings springs
1164 naturally out of the need to move procedures closer to the data they manipulate.
1165 This composed refactoring is not well described in the literature, but it is
1166 implemented in at least one tool called
1167 \name{CodeRush}\footnote{\url{https://help.devexpress.com/\#CodeRush/CustomDocument3519}},
1168 that is an extension for \name{MS Visual
1169 Studio}\footnote{\url{http://www.visualstudio.com/}}. In CodeRush it is called
1170 \refa{Extract Method to
1171 Type}\footnote{\url{https://help.devexpress.com/\#CodeRush/CustomDocument6710}},
1172 but I choose to call it \ExtractAndMoveMethod, since I feel it better
1173 communicates which primitive refactorings it is composed of.
1174
1175 For the metrics, I will at least measure the \metr{Coupling between object
1176 classes} (CBO) metric that is described by Chidamber and Kemerer in their
1177 article \tit{A Metrics Suite for Object Oriented
1178 Design}\citing{metricsSuite1994}.
1179
1180 The project will then consist in implementing the \ExtractAndMoveMethod
1181 refactoring, as well as executing it over a larger code base. Then the effect of
1182 the change must be measured by calculating the chosen software metrics both
1183 before and after the execution. To be able to execute the refactoring
1184 automatically I have to make it analyze code to determine the best selections to
1185 extract into new methods.
1186
1187
1188 \backmatter{}
1189 \printglossaries
1190 \printbibliography
1191 \listoftodos
1192 \end{document}