]>
Commit | Line | Data |
---|---|---|
c8088eec | 1 | \documentclass[USenglish,11pt]{ifimaster} |
571ef294 | 2 | \usepackage{import} |
7c28933b | 3 | \usepackage[utf8]{inputenc} |
9ff90080 | 4 | \usepackage[T1]{fontenc,url} |
3510e539 | 5 | \usepackage{lmodern} % using Latin Modern to be able to use bold typewriter font |
b4e539f7 | 6 | %\usepackage{mathpazo} |
9ff90080 | 7 | \urlstyle{sf} |
128adb4f | 8 | \usepackage{listings} |
58467004 | 9 | \usepackage{booktabs} |
4e468834 | 10 | \usepackage{tabularx} |
d11bcf4d EK |
11 | \usepackage{tikz} |
12 | \usepackage{tikz-qtree} | |
5e5908eb | 13 | \usetikzlibrary{shapes,snakes,trees,arrows,shadows,positioning,calc} |
ae138b9d EK |
14 | \usepackage{babel,textcomp,csquotes,ifimasterforside} |
15 | ||
16 | \usepackage{varioref} | |
79d5c2a9 | 17 | \usepackage[hidelinks]{hyperref} |
8b6b22c8 | 18 | \usepackage{cleveref} |
b4a197fd | 19 | \usepackage[xindy]{glossaries} |
ae138b9d | 20 | |
4ea21603 | 21 | \usepackage[style=alphabetic,backend=biber,doi=false,isbn=false]{biblatex} |
12c254af | 22 | \usepackage{amsthm} |
3471cd15 | 23 | \usepackage{mathtools} |
0ab2e134 EK |
24 | \usepackage{graphicx} |
25 | % use 'disable' before printing: | |
26 | \usepackage[]{todonotes} | |
0d7fbd88 EK |
27 | \usepackage{xspace} |
28 | \usepackage{he-she} | |
b289552b | 29 | \usepackage{verbatim} |
ddcea0b5 | 30 | \usepackage{minted} |
347ed677 | 31 | \usepackage{multicol} |
ddcea0b5 | 32 | \usemintedstyle{bw} |
53be7239 EK |
33 | |
34 | \def\mintedframesep{11pt} | |
35 | ||
8fae7b44 EK |
36 | \usepackage{perpage} %the perpage package |
37 | \MakePerPage{footnote} %the perpage package command | |
9ff90080 | 38 | |
6c51af15 | 39 | \theoremstyle{definition} |
12c254af | 40 | \newtheorem*{wordDef}{Definition} |
3471cd15 | 41 | \newtheorem*{theorem}{Theorem} |
12c254af | 42 | |
ddcea0b5 EK |
43 | \graphicspath{ {./figures/} } |
44 | ||
8b6b22c8 | 45 | \newcommand{\citing}[1]{~\cite{#1}} |
e123ab03 EK |
46 | %\newcommand{\myref}[1]{\cref{#1} on \cpageref{#1}} |
47 | \newcommand{\myref}[1]{\vref{#1}} | |
4306ef44 | 48 | \newcommand{\Myref}[1]{\Vref{#1}} |
8b6b22c8 | 49 | |
b4a197fd | 50 | %\newcommand{\glossref}[1]{\textsuperscript{(\glsrefentry{#1})}} |
20bcc7bf EK |
51 | %\newcommand{\gloss}[1]{\gls{#1}\glossref{#1}} |
52 | %\newcommand{\glosspl}[1]{\glspl{#1}\glossref{#1}} | |
53 | \newcommand{\gloss}[1]{\gls{#1}} | |
54 | \newcommand{\glosspl}[1]{\glspl{#1}} | |
fe0a4c48 | 55 | |
12c254af | 56 | \newcommand{\definition}[1]{\begin{wordDef}#1\end{wordDef}} |
8b6b22c8 | 57 | \newcommand{\see}[1]{(see \myref{#1})} |
137e0e7b EK |
58 | \newcommand{\explanation}[3]{\noindent\textbf{\textit{#1}}\\*\emph{When:} |
59 | #2\\*\emph{How:} #3\\*[-7px]} | |
4e135659 | 60 | |
128adb4f | 61 | %\newcommand{\type}[1]{\lstinline{#1}} |
03674629 EK |
62 | \newcommand{\code}[1]{\texttt{\textbf{#1}}} |
63 | \newcommand{\type}[1]{\code{#1}} | |
f041551b EK |
64 | \newcommand{\typeref}[1]{\footnote{\type{#1}}} |
65 | \newcommand{\typewithref}[2]{\type{#2}\typeref{#1.#2}} | |
66 | \newcommand{\method}[1]{\type{#1}} | |
67 | \newcommand{\methodref}[2]{\footnote{\type{#1}\method{\##2()}}} | |
68 | \newcommand{\methodwithref}[2]{\method{#2}\footnote{\type{#1}\method{\##2()}}} | |
3510e539 | 69 | \newcommand{\var}[1]{\type{#1}} |
9ff90080 | 70 | |
fe0a4c48 EK |
71 | \newcommand{\name}[1]{#1} |
72 | \newcommand{\tit}[1]{\emph{#1}} | |
73 | \newcommand{\refa}[1]{\emph{#1}} | |
74 | \newcommand{\pattern}[1]{\emph{#1}} | |
75 | \newcommand{\metr}[1]{\emph{#1}} | |
76 | \newcommand{\ExtractMethod}{\refa{Extract Method}\xspace} | |
77 | \newcommand{\MoveMethod}{\refa{Move Method}\xspace} | |
78 | \newcommand{\ExtractAndMoveMethod}{\refa{Extract and Move Method}\xspace} | |
b5c7bb1b | 79 | |
1c521a77 | 80 | \newcommand{\m}[1]{$#1$} |
46f52f52 | 81 | |
7125ceac | 82 | \newcommand\todoin[2][]{\todo[inline, caption={#2}, #1]{ |
4e135659 EK |
83 | \begin{minipage}{\textwidth-4pt}#2\end{minipage}}} |
84 | ||
cbc3b044 | 85 | \title{Automated Composition of Refactorings} |
b4d90424 | 86 | \subtitle{Implementing and evaluating a search-based Extract and Move Method |
01d46361 | 87 | refactoring} |
7c28933b EK |
88 | \author{Erlend Kristiansen} |
89 | ||
fe0a4c48 EK |
90 | \makeglossaries |
91 | \newglossaryentry{profiling} | |
92 | { | |
93 | name=profiling, | |
94 | description={is to run a computer program through a profiler/with a profiler | |
95 | attached} | |
96 | } | |
20bcc7bf EK |
97 | \newglossaryentry{profiler} |
98 | { | |
99 | name=profiler, | |
100 | description={A profiler is a program for analyzing performance within an | |
101 | application. It is used to analyze memory consumption, processing time and | |
60065669 | 102 | frequency of procedure calls and such} |
20bcc7bf EK |
103 | } |
104 | \newglossaryentry{xUnit} | |
105 | { | |
106 | name={xUnit framework}, | |
107 | description={An xUnit framework is a framework for writing unit tests for a | |
108 | computer program. It follows the patterns known from the JUnit framework for | |
109 | Java\citing{fowlerXunit} | |
110 | }, | |
111 | plural={xUnit frameworks} | |
112 | } | |
fe0a4c48 EK |
113 | \newglossaryentry{softwareObfuscation} |
114 | { | |
115 | name={software obfuscation}, | |
116 | description={makes source code harder to read and analyze, while preserving | |
117 | its semantics} | |
118 | } | |
119 | \newglossaryentry{extractClass} | |
120 | { | |
121 | name=\refa{Extract Class}, | |
122 | description={The \refa{Extract Class} refactoring works by creating a class, | |
123 | for then to move members from another class to that class and access them from | |
124 | the old class via a reference to the new class} | |
125 | } | |
126 | \newglossaryentry{designPattern} | |
127 | { | |
128 | name={design pattern}, | |
129 | description={A design pattern is a named abstraction, that is meant to solve a | |
130 | general design problem. It describes the key aspects of a common problem and | |
131 | identifies its participators and how they collaborate}, | |
132 | plural={design patterns} | |
133 | } | |
1c521a77 EK |
134 | \newglossaryentry{enclosingClass} |
135 | { | |
136 | name={enclosing class}, | |
137 | description={An enclosing class is the class that surrounds any specific piece | |
138 | of code that is written in the inner scope of this class}, | |
139 | } | |
81cf9554 EK |
140 | \newglossaryentry{mementoPattern} |
141 | { | |
142 | name={memento pattern}, | |
143 | description={The memento pattern is a software design pattern that is used to | |
144 | capture an object's internal state so that it can be restored to this state | |
145 | later\citing{designPatterns}}, | |
146 | } | |
b4a197fd EK |
147 | %\newglossaryentry{extractMethod} |
148 | %{ | |
149 | % name=\refa{Extract Method}, | |
150 | % description={The \refa{Extract Method} refactoring is used to extract a | |
151 | %fragment of code from its context and into a new method. A call to the new | |
152 | %method is inlined where the fragment was before. It is used to break code into | |
153 | %logical units, with names that explain their purpose} | |
154 | %} | |
155 | %\newglossaryentry{moveMethod} | |
156 | %{ | |
157 | % name=\refa{Move Method}, | |
158 | % description={The \refa{Move Method} refactoring is used to move a method from | |
159 | % one class to another. This is useful if the method is using more features of | |
160 | % another class than of the class which it is currently defined. Then all calls | |
161 | % to this method must be updated, or the method must be copied, with the old | |
162 | %method delegating to the new method} | |
163 | %} | |
f5fb40e4 | 164 | |
7c28933b | 165 | \bibliography{bibliography/master-thesis-erlenkr-bibliography} |
6fabec9b EK |
166 | \DefineBibliographyStrings{english}{% |
167 | bibliography = {References}, | |
168 | } | |
9ff90080 | 169 | |
c8088eec EK |
170 | % UML comment in TikZ: |
171 | % ref: https://tex.stackexchange.com/questions/103688/folded-paper-shape-tikz | |
43ca7be4 EK |
172 | \makeatletter |
173 | \pgfdeclareshape{umlcomment}{ | |
174 | \inheritsavedanchors[from=rectangle] % this is nearly a rectangle | |
175 | \inheritanchorborder[from=rectangle] | |
176 | \inheritanchor[from=rectangle]{center} | |
177 | \inheritanchor[from=rectangle]{north} | |
178 | \inheritanchor[from=rectangle]{south} | |
179 | \inheritanchor[from=rectangle]{west} | |
180 | \inheritanchor[from=rectangle]{east} | |
181 | % ... and possibly more | |
182 | \backgroundpath{% this is new | |
183 | % store lower right in xa/ya and upper right in xb/yb | |
184 | \southwest \pgf@xa=\pgf@x \pgf@ya=\pgf@y | |
185 | \northeast \pgf@xb=\pgf@x \pgf@yb=\pgf@y | |
186 | % compute corner of ‘‘flipped page’’ | |
187 | \pgf@xc=\pgf@xb \advance\pgf@xc by-10pt % this should be a parameter | |
188 | \pgf@yc=\pgf@yb \advance\pgf@yc by-10pt | |
189 | % construct main path | |
190 | \pgfpathmoveto{\pgfpoint{\pgf@xa}{\pgf@ya}} | |
191 | \pgfpathlineto{\pgfpoint{\pgf@xa}{\pgf@yb}} | |
192 | \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yb}} | |
193 | \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@yc}} | |
194 | \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@ya}} | |
195 | \pgfpathclose | |
196 | % add little corner | |
197 | \pgfpathmoveto{\pgfpoint{\pgf@xc}{\pgf@yb}} | |
198 | \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yc}} | |
199 | \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@yc}} | |
200 | \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yc}} | |
201 | } | |
202 | } | |
203 | \makeatother | |
204 | ||
205 | \tikzstyle{comment}=[% | |
206 | draw, | |
207 | drop shadow, | |
208 | fill=white, | |
209 | align=center, | |
210 | shape=document, | |
211 | minimum width=20mm, | |
212 | minimum height=10mm, | |
213 | shape=umlcomment, | |
214 | inner sep=2ex, | |
215 | font=\ttfamily, | |
216 | ] | |
217 | ||
f5fb40e4 EK |
218 | %\interfootnotelinepenalty=10000 |
219 | ||
c9488804 EK |
220 | % Space between table rows |
221 | \renewcommand{\arraystretch}{1.3} | |
6fabec9b | 222 | % Multicolumns |
bc7b5d67 | 223 | \newcommand{\spancols}[2]{\multicolumn{#1}{@{}l@{}}{#2}} |
6fabec9b EK |
224 | % Column types |
225 | \newcolumntype{L}[1]{>{\hsize=#1\hsize\raggedright\arraybackslash}X}% | |
226 | \newcolumntype{R}[1]{>{\hsize=#1\hsize\raggedleft\arraybackslash}X}% | |
c9488804 | 227 | |
9ff90080 | 228 | \begin{document} |
0cbeeadb EK |
229 | %\pagenumbering{arabic} |
230 | \mainmatter | |
531c4132 | 231 | \ififorside |
0cbeeadb | 232 | %\frontmatter{} |
9ff90080 | 233 | |
0cbeeadb | 234 | %\setcounter{page}{3} |
9ff90080 EK |
235 | |
236 | \chapter*{Abstract} | |
064227e3 EK |
237 | \todoin{\textbf{Remove all todos (including list) before delivery/printing!!! |
238 | Can be done by removing ``draft'' from documentclass.}} | |
889ba93e | 239 | \todoin{Write abstract} |
9ff90080 EK |
240 | |
241 | \tableofcontents{} | |
242 | \listoffigures{} | |
243 | \listoftables{} | |
244 | ||
0cbeeadb EK |
245 | %\mainmatter |
246 | %\setcounter{page}{13} | |
247 | ||
af21f437 EK |
248 | \chapter{Introduction} |
249 | \section{Motivation and structure} | |
00aa0588 | 250 | |
b4d90424 | 251 | \section{What is refactoring?} |
7c28933b | 252 | |
f3a108c3 EK |
253 | This question is best answered by first defining the concept of a |
254 | \emph{refactoring}, what it is to \emph{refactor}, and then discuss what aspects | |
a1bafe90 | 255 | of programming make people want to refactor their code. |
00aa0588 | 256 | |
b4d90424 | 257 | \subsection{Defining refactoring} |
a1bafe90 | 258 | Martin Fowler, in his classic book on refactoring\citing{refactoring}, defines a |
00aa0588 | 259 | refactoring like this: |
ee45c41f | 260 | |
00aa0588 | 261 | \begin{quote} |
aa1e3779 EK |
262 | \emph{Refactoring} (noun): a change made to the internal |
263 | structure\footnote{The structure observable by the programmer.} of software to | |
264 | make it easier to understand and cheaper to modify without changing its | |
265 | observable behavior.~\cite[p.~53]{refactoring} | |
00aa0588 | 266 | \end{quote} |
ee45c41f | 267 | |
a1bafe90 | 268 | \noindent This definition assigns additional meaning to the word |
ee45c41f EK |
269 | \emph{refactoring}, beyond the composition of the prefix \emph{re-}, usually |
270 | meaning something like ``again'' or ``anew'', and the word \emph{factoring}, | |
6c51af15 EK |
271 | that can mean to isolate the \emph{factors} of something. Here a \emph{factor} |
272 | would be close to the mathematical definition of something that divides a | |
273 | quantity, without leaving a remainder. Fowler is mixing the \emph{motivation} | |
a1bafe90 EK |
274 | behind refactoring into his definition. Instead it could be more refined, formed |
275 | to only consider the \emph{mechanical} and \emph{behavioral} aspects of | |
276 | refactoring. That is to factor the program again, putting it together in a | |
277 | different way than before, while preserving the behavior of the program. An | |
278 | alternative definition could then be: | |
6c51af15 EK |
279 | |
280 | \definition{A \emph{refactoring} is a transformation | |
8fae7b44 | 281 | done to a program without altering its external behavior.} |
00aa0588 | 282 | |
ee1d883a EK |
283 | From this we can conclude that a refactoring primarily changes how the |
284 | \emph{code} of a program is perceived by the \emph{programmer}, and not the | |
740e1b6c EK |
285 | \emph{behavior} experienced by any user of the program. Although the logical |
286 | meaning is preserved, such changes could potentially alter the program's | |
287 | behavior when it comes to performance gain or -penalties. So any logic depending | |
288 | on the performance of a program could make the program behave differently after | |
289 | a refactoring. | |
00aa0588 | 290 | |
fe0a4c48 EK |
291 | In the extreme case one could argue that \gloss{softwareObfuscation} is |
292 | refactoring. It is often used to protect proprietary software. It restrains | |
293 | uninvited viewers, so they have a hard time analyzing code that they are not | |
294 | supposed to know how works. This could be a problem when using a language that | |
295 | is possible to decompile, such as Java. | |
b4e539f7 EK |
296 | |
297 | Obfuscation could be done composing many, more or less randomly chosen, | |
298 | refactorings. Then the question arises whether it can be called a | |
299 | \emph{composite refactoring} or not \see{compositeRefactorings}? The answer is | |
300 | not obvious. First, there is no way to describe the mechanics of software | |
301 | obfuscation, because there are infinitely many ways to do that. Second, | |
302 | obfuscation can be thought of as \emph{one operation}: Either the code is | |
303 | obfuscated, or it is not. Third, it makes no sense to call software obfuscation | |
304 | \emph{a refactoring}, since it holds different meaning to different people. | |
305 | ||
306 | This last point is important, since one of the motivations behind defining | |
307 | different refactorings, is to establish a \emph{vocabulary} for software | |
308 | professionals to use when reasoning about and discussing programs, similar to | |
fe0a4c48 | 309 | the motivation behind \glosspl{designPattern}\citing{designPatterns}. |
b4e539f7 EK |
310 | \begin{comment} |
311 | So for describing \emph{software obfuscation}, it might be more appropriate to | |
312 | define what you do when performing it rather than precisely defining its | |
313 | mechanics in terms of other refactorings. | |
314 | \end{comment} | |
00aa0588 | 315 | |
b4d90424 | 316 | \subsection{The etymology of 'refactoring'} |
f3a108c3 EK |
317 | It is a little difficult to pinpoint the exact origin of the word |
318 | ``refactoring'', as it seems to have evolved as part of a colloquial | |
319 | terminology, more than a scientific term. There is no authoritative source for a | |
320 | formal definition of it. | |
321 | ||
b5c7bb1b | 322 | According to Martin Fowler\citing{etymology-refactoring}, there may also be more |
f3a108c3 | 323 | than one origin of the word. The most well-known source, when it comes to the |
b4e539f7 EK |
324 | origin of \emph{refactoring}, is the |
325 | Smalltalk\footnote{\label{footNote}Programming language} community and their | |
fe0a4c48 | 326 | infamous \name{Refactoring |
f3a108c3 | 327 | Browser}\footnote{\url{http://st-www.cs.illinois.edu/users/brant/Refactory/RefactoringBrowser.html}} |
fe0a4c48 | 328 | described in the article \tit{A Refactoring Tool for |
b5c7bb1b EK |
329 | Smalltalk}\citing{refactoringBrowser1997}, published in 1997. |
330 | Allegedly\citing{etymology-refactoring}, the metaphor of factoring programs was | |
b4e539f7 | 331 | also present in the Forth\textsuperscript{\ref{footNote}} community, and the |
fe0a4c48 EK |
332 | word ``refactoring'' is mentioned in a book by Leo Brodie, called \tit{Thinking |
333 | Forth}\citing{brodie2004}, first published in 1984\footnote{\tit{Thinking Forth} | |
334 | was first published in 1984 by the \name{Forth Interest Group}. Then it was | |
335 | reprinted in 1994 with minor typographical corrections, before it was | |
b4e539f7 EK |
336 | transcribed into an electronic edition typeset in \LaTeX\ and published under a |
337 | Creative Commons licence in | |
338 | 2004. The edition cited here is the 2004 edition, but the content should | |
339 | essentially be as in 1984.}. The exact word is only printed one | |
340 | place~\cite[p.~232]{brodie2004}, but the term \emph{factoring} is prominent in | |
341 | the book, that also contains a whole chapter dedicated to (re)factoring, and how | |
342 | to keep the (Forth) code clean and maintainable. | |
ee45c41f | 343 | |
f3a108c3 EK |
344 | \begin{quote} |
345 | \ldots good factoring technique is perhaps the most important skill for a | |
3a154bb7 | 346 | Forth programmer.~\cite[p.~172]{brodie2004} |
f3a108c3 | 347 | \end{quote} |
ee45c41f EK |
348 | |
349 | \noindent Brodie also express what \emph{factoring} means to him: | |
350 | ||
f3a108c3 EK |
351 | \begin{quote} |
352 | Factoring means organizing code into useful fragments. To make a fragment | |
353 | useful, you often must separate reusable parts from non-reusable parts. The | |
354 | reusable parts become new definitions. The non-reusable parts become arguments | |
3a154bb7 | 355 | or parameters to the definitions.~\cite[p.~172]{brodie2004} |
f3a108c3 EK |
356 | \end{quote} |
357 | ||
358 | Fowler claims that the usage of the word \emph{refactoring} did not pass between | |
fe0a4c48 | 359 | the \name{Forth} and \name{Smalltalk} communities, but that it emerged |
f3a108c3 EK |
360 | independently in each of the communities. |
361 | ||
b4d90424 | 362 | \subsection{Reasons for refactoring} |
2f6e6dec EK |
363 | There are many reasons why people want to refactor their programs. They can for |
364 | instance do it to remove duplication, break up long methods or to introduce | |
b4e539f7 EK |
365 | design patterns into their software systems. The shared trait for all these are |
366 | that peoples' intentions are to make their programs \emph{better}, in some | |
367 | sense. But what aspects of their programs are becoming improved? | |
368 | ||
369 | As just mentioned, people often refactor to get rid of duplication. They are | |
370 | moving identical or similar code into methods, and are pushing methods up or | |
371 | down in their class hierarchies. They are making template methods for | |
372 | overlapping algorithms/functionality, and so on. It is all about gathering what | |
373 | belongs together and putting it all in one place. The resulting code is then | |
374 | easier to maintain. When removing the implicit coupling\footnote{When | |
375 | duplicating code, the duplicate pieces of code might not be coupled, apart | |
376 | from representing the same functionality. So if this functionality is going to | |
377 | change, it might need to change in more than one place, thus creating an | |
378 | implicit coupling between multiple pieces of code.} between code snippets, the | |
137e0e7b | 379 | location of a bug is limited to only one place, and new functionality need only |
a1bafe90 EK |
380 | to be added to this one place, instead of a number of places people might not |
381 | even remember. | |
382 | ||
383 | A problem you often encounter when programming, is that a program contains a lot | |
384 | of long and hard-to-grasp methods. It can then help to break the methods into | |
b4a197fd EK |
385 | smaller ones, using the \ExtractMethod refactoring\citing{refactoring}. Then |
386 | you may discover something about a program that you were not aware of before; | |
387 | revealing bugs you did not know about or could not find due to the complex | |
078b1e4a EK |
388 | structure of your program. Making the methods smaller and giving good names to |
389 | the new ones clarifies the algorithms and enhances the \emph{understandability} | |
390 | of the program \see{magic_number_seven}. This makes refactoring an excellent | |
391 | method for exploring unknown program code, or code that you had forgotten that | |
392 | you wrote. | |
a1bafe90 | 393 | |
b4e539f7 EK |
394 | Most primitive refactorings are simple, and usually involves moving code |
395 | around\citing{kerievsky2005}. The motivation behind them may first be revealed | |
396 | when they are combined into larger --- higher level --- refactorings, called | |
a1bafe90 | 397 | \emph{composite refactorings} \see{compositeRefactorings}. Often the goal of |
b4e539f7 EK |
398 | such a series of refactorings is a design pattern. Thus the design can |
399 | \emph{evolve} throughout the lifetime of a program, as opposed to designing | |
400 | up-front. It is all about being structured and taking small steps to improve a | |
401 | program's design. | |
a1bafe90 EK |
402 | |
403 | Many software design pattern are aimed at lowering the coupling between | |
404 | different classes and different layers of logic. One of the most famous is | |
fe0a4c48 EK |
405 | perhaps the \pattern{Model-View-Controller}\citing{designPatterns} pattern. It |
406 | is aimed at lowering the coupling between the user interface, the business logic | |
b4e539f7 EK |
407 | and the data representation of a program. This also has the added benefit that |
408 | the business logic could much easier be the target of automated tests, thus | |
409 | increasing the productivity in the software development process. | |
0b0567f2 EK |
410 | |
411 | Another effect of refactoring is that with the increased separation of concerns | |
a1bafe90 EK |
412 | coming out of many refactorings, the \emph{performance} can be improved. When |
413 | profiling programs, the problematic parts are narrowed down to smaller parts of | |
414 | the code, which are easier to tune, and optimization can be performed only where | |
b4e539f7 | 415 | needed and in a more effective way\citing{refactoring}. |
137e0e7b | 416 | |
b01d328a EK |
417 | Last, but not least, and this should probably be the best reason to refactor, is |
418 | to refactor to \emph{facilitate a program change}. If one has managed to keep | |
419 | one's code clean and tidy, and the code is not bloated with design patterns that | |
a1bafe90 | 420 | are not ever going to be needed, then some refactoring might be needed to |
b01d328a EK |
421 | introduce a design pattern that is appropriate for the change that is going to |
422 | happen. | |
423 | ||
137e0e7b EK |
424 | Refactoring program code --- with a goal in mind --- can give the code itself |
425 | more value. That is in the form of robustness to bugs, understandability and | |
a1bafe90 EK |
426 | maintainability. Having robust code is an obvious advantage, but |
427 | understandability and maintainability are both very important aspects of | |
428 | software development. By incorporating refactoring in the development process, | |
429 | bugs are found faster, new functionality is added more easily and code is easier | |
430 | to understand by the next person exposed to it, which might as well be the | |
431 | person who wrote it. The consequence of this, is that refactoring can increase | |
432 | the average productivity of the development process, and thus also add to the | |
433 | monetary value of a business in the long run. The perspective on productivity | |
434 | and money should also be able to open the eyes of the many nearsighted managers | |
435 | that seldom see beyond the next milestone. | |
137e0e7b | 436 | |
b4d90424 | 437 | \subsection{The magical number seven}\label{magic_number_seven} |
fe0a4c48 EK |
438 | The article \tit{The magical number seven, plus or minus two: some limits on our |
439 | capacity for processing information}\citing{miller1956} by George A. Miller, | |
440 | was published in the journal \name{Psychological Review} in 1956. It presents | |
441 | evidence that support that the capacity of the number of objects a human being | |
442 | can hold in its working memory is roughly seven, plus or minus two objects. This | |
443 | number varies a bit depending on the nature and complexity of the objects, but | |
444 | is according to Miller ``\ldots never changing so much as to be | |
f4cea2d6 EK |
445 | unrecognizable.'' |
446 | ||
447 | Miller's article culminates in the section called \emph{Recoding}, a term he | |
448 | borrows from communication theory. The central result in this section is that by | |
449 | recoding information, the capacity of the amount of information that a human can | |
450 | process at a time is increased. By \emph{recoding}, Miller means to group | |
b4e539f7 EK |
451 | objects together in chunks, and give each chunk a new name that it can be |
452 | remembered by. | |
f4cea2d6 EK |
453 | |
454 | \begin{quote} | |
455 | \ldots recoding is an extremely powerful weapon for increasing the amount of | |
4cb06723 | 456 | information that we can deal with.~\cite[p.~95]{miller1956} |
f4cea2d6 | 457 | \end{quote} |
ee45c41f | 458 | |
b4e539f7 EK |
459 | By organizing objects into patterns of ever growing depth, one can memorize and |
460 | process a much larger amount of data than if it were to be represented as its | |
461 | basic pieces. This grouping and renaming is analogous to how many refactorings | |
462 | work, by grouping pieces of code and give them a new name. Examples are the | |
fe0a4c48 | 463 | fundamental \ExtractMethod and \refa{Extract Class} |
b4e539f7 EK |
464 | refactorings\citing{refactoring}. |
465 | ||
a1bafe90 | 466 | An example from the article addresses the problem of memorizing a sequence of |
b4e539f7 EK |
467 | binary digits. The example presented here is a slightly modified version of the |
468 | one presented in the original article\citing{miller1956}, but it preserves the | |
3ab3e132 | 469 | essence of it. Let us say we have the following sequence of |
f4cea2d6 EK |
470 | 16 binary digits: ``1010001001110011''. Most of us will have a hard time |
471 | memorizing this sequence by only reading it once or twice. Imagine if we instead | |
472 | translate it to this sequence: ``A273''. If you have a background from computer | |
b4e539f7 EK |
473 | science, it will be obvious that the latter sequence is the first sequence |
474 | recoded to be represented by digits in base 16. Most people should be able to | |
f4cea2d6 EK |
475 | memorize this last sequence by only looking at it once. |
476 | ||
477 | Another result from the Miller article is that when the amount of information a | |
478 | human must interpret increases, it is crucial that the translation from one code | |
479 | to another must be almost automatic for the subject to be able to remember the | |
0d7fbd88 | 480 | translation, before \heshe is presented with new information to recode. Thus |
f4cea2d6 EK |
481 | learning and understanding how to best organize certain kinds of data is |
482 | essential to efficiently handle that kind of data in the future. This is much | |
a1bafe90 EK |
483 | like when humans learn to read. First they must learn how to recognize letters. |
484 | Then they can learn distinct words, and later read sequences of words that form | |
485 | whole sentences. Eventually, most of them will be able to read whole books and | |
486 | briefly retell the important parts of its content. This suggest that the use of | |
b4e539f7 EK |
487 | design patterns is a good idea when reasoning about computer programs. With |
488 | extensive use of design patterns when creating complex program structures, one | |
489 | does not always have to read whole classes of code to comprehend how they | |
490 | function, it may be sufficient to only see the name of a class to almost fully | |
491 | understand its responsibilities. | |
f4cea2d6 EK |
492 | |
493 | \begin{quote} | |
494 | Our language is tremendously useful for repackaging material into a few chunks | |
4cb06723 | 495 | rich in information.~\cite[p.~95]{miller1956} |
f4cea2d6 | 496 | \end{quote} |
ee45c41f | 497 | |
a1bafe90 | 498 | Without further evidence, these results at least indicate that refactoring |
f4cea2d6 EK |
499 | source code into smaller units with higher cohesion and, when needed, |
500 | introducing appropriate design patterns, should aid in the cause of creating | |
b4e539f7 | 501 | computer programs that are easier to maintain and have code that is easier (and |
f4cea2d6 EK |
502 | better) understood. |
503 | ||
b4d90424 | 504 | \subsection{Notable contributions to the refactoring literature} |
36d99783 | 505 | |
d21ef41f EK |
506 | \begin{description} |
507 | \item[1992] William F. Opdyke submits his doctoral dissertation called | |
fe0a4c48 EK |
508 | \tit{Refactoring Object-Oriented Frameworks}\citing{opdyke1992}. This work |
509 | defines a set of refactorings, that are behavior preserving given that their | |
510 | preconditions are met. The dissertation is focused on the automation of | |
511 | refactorings. | |
512 | \item[1999] Martin Fowler et al.: \tit{Refactoring: Improving the Design of | |
b5c7bb1b EK |
513 | Existing Code}\citing{refactoring}. This is maybe the most influential text |
514 | on refactoring. It bares similarities with Opdykes thesis\citing{opdyke1992} | |
d21ef41f EK |
515 | in the way that it provides a catalog of refactorings. But Fowler's book is |
516 | more about the craft of refactoring, as he focuses on establishing a | |
517 | vocabulary for refactoring, together with the mechanics of different | |
518 | refactorings and when to perform them. His methodology is also founded on | |
fe0a4c48 EK |
519 | the principles of test-driven development. |
520 | \item[2005] Joshua Kerievsky: \tit{Refactoring to | |
36d99783 | 521 | Patterns}\citing{kerievsky2005}. This book is heavily influenced by Fowler's |
fe0a4c48 | 522 | \tit{Refactoring}\citing{refactoring} and the ``Gang of Four'' \tit{Design |
36d99783 EK |
523 | Patterns}\citing{designPatterns}. It is building on the refactoring |
524 | catalogue from Fowler's book, but is trying to bridge the gap between | |
525 | \emph{refactoring} and \emph{design patterns} by providing a series of | |
526 | higher-level composite refactorings, that makes code evolve toward or away | |
fe0a4c48 | 527 | from certain design patterns. The book is trying to build up the reader's |
36d99783 EK |
528 | intuition around \emph{why} one would want to use a particular design |
529 | pattern, and not just \emph{how}. The book is encouraging evolutionary | |
e123ab03 | 530 | design \see{relationToDesignPatterns}. |
d21ef41f | 531 | \end{description} |
3b7c1d90 | 532 | |
b4d90424 | 533 | \subsection{Tool support (for Java)}\label{toolSupport} |
3ab3e132 | 534 | This section will briefly compare the refactoring support of the three IDEs |
fe0a4c48 EK |
535 | \name{Eclipse}\footnote{\url{http://www.eclipse.org/}}, \name{IntelliJ |
536 | IDEA}\footnote{The IDE under comparison is the \name{Community Edition}, | |
4e135659 | 537 | \url{http://www.jetbrains.com/idea/}} and |
fe0a4c48 | 538 | \name{NetBeans}\footnote{\url{https://netbeans.org/}}. These are the most |
4e135659 EK |
539 | popular Java IDEs\citing{javaReport2011}. |
540 | ||
541 | All three IDEs provide support for the most useful refactorings, like the | |
542 | different extract, move and rename refactorings. In fact, Java-targeted IDEs are | |
543 | known for their good refactoring support, so this did not appear as a big | |
544 | surprise. | |
545 | ||
546 | The IDEs seem to have excellent support for the \ExtractMethod refactoring, so | |
b4e539f7 EK |
547 | at least they have all passed the first ``refactoring |
548 | rubicon''\citing{fowlerRubicon2001,secondRubicon2012}. | |
4e135659 | 549 | |
b4a197fd EK |
550 | Regarding the \MoveMethod refactoring, the \name{Eclipse} and \name{IntelliJ} |
551 | IDEs do the job in very similar manners. In most situations they both do a | |
552 | satisfying job by producing the expected outcome. But they do nothing to check | |
553 | that the result does not break the semantics of the program \see{correctness}. | |
fe0a4c48 | 554 | The \name{NetBeans} IDE implements this refactoring in a somewhat |
b4e539f7 EK |
555 | unsophisticated way. For starters, the refactoring's default destination for the |
556 | move, is the same class as the method already resides in, although it refuses to | |
557 | perform the refactoring if chosen. But the worst part is, that if moving the | |
558 | method \method{f} of the class \type{C} to the class \type{X}, it will break the | |
559 | code. The result is shown in \myref{lst:moveMethod_NetBeans}. | |
4e135659 | 560 | |
347ed677 EK |
561 | \begin{listing} |
562 | \begin{multicols}{2} | |
4e135659 EK |
563 | \begin{minted}[samepage]{java} |
564 | public class C { | |
565 | private X x; | |
566 | ... | |
567 | public void f() { | |
568 | x.m(); | |
569 | x.n(); | |
570 | } | |
571 | } | |
572 | \end{minted} | |
573 | ||
347ed677 | 574 | \columnbreak |
4e135659 EK |
575 | |
576 | \begin{minted}[samepage]{java} | |
577 | public class X { | |
578 | ... | |
579 | public void f(C c) { | |
580 | c.x.m(); | |
581 | c.x.n(); | |
582 | } | |
583 | } | |
584 | \end{minted} | |
347ed677 EK |
585 | \end{multicols} |
586 | \caption{Moving method \method{f} from \type{C} to \type{X}.} | |
587 | \label{lst:moveMethod_NetBeans} | |
588 | \end{listing} | |
4e135659 | 589 | |
fe0a4c48 | 590 | \name{NetBeans} will try to create code that call the methods \method{m} and \method{n} |
4e135659 | 591 | of \type{X} by accessing them through \var{c.x}, where \var{c} is a parameter of |
a1bafe90 EK |
592 | type \type{C} that is added the method \method{f} when it is moved. (This is |
593 | seldom the desired outcome of this refactoring, but ironically, this ``feature'' | |
fe0a4c48 | 594 | keeps \name{NetBeans} from breaking the code in the example from \myref{correctness}.) |
8b6b22c8 | 595 | If \var{c.x} for some reason is inaccessible to \type{X}, as in this case, the |
fe0a4c48 | 596 | refactoring breaks the code, and it will not compile. \name{NetBeans} presents a |
8b6b22c8 EK |
597 | preview of the refactoring outcome, but the preview does not catch it if the IDE |
598 | is about break the program. | |
4778044b | 599 | |
b4e539f7 | 600 | The IDEs under investigation seem to have fairly good support for primitive |
fe0a4c48 EK |
601 | refactorings, but what about more complex ones, such as |
602 | \gloss{extractClass}\citing{refactoring}? \name{IntelliJ} handles this in a | |
603 | fairly good manner, although, in the case of private methods, it leaves unused | |
a1bafe90 | 604 | methods behind. These are methods that delegate to a field with the type of the |
fe0a4c48 EK |
605 | new class, but are not used anywhere. \name{Eclipse} has added its own quirk to |
606 | the \refa{Extract Class} refactoring, and only allows for \emph{fields} to be | |
607 | moved to a new class, \emph{not methods}. This makes it effectively only | |
608 | extracting a data structure, and calling it \refa{Extract Class} is a little | |
b4e539f7 | 609 | misleading. One would often be better off with textual extract and paste than |
fe0a4c48 EK |
610 | using the \refa{Extract Class} refactoring in \name{Eclipse}. When it comes to |
611 | \name{NetBeans}, it does not even show an attempt on providing this refactoring. | |
4778044b | 612 | |
b4d90424 | 613 | \subsection{The relation to design patterns}\label{relationToDesignPatterns} |
4cb06723 | 614 | |
fe0a4c48 EK |
615 | Refactoring and design patterns have at least one thing in common, they are both |
616 | promoted by advocates of \emph{clean code}\citing{cleanCode} as fundamental | |
617 | tools on the road to more maintainable and extendable source code. | |
4cb06723 EK |
618 | |
619 | \begin{quote} | |
620 | Design patterns help you determine how to reorganize a design, and they can | |
621 | reduce the amount of refactoring you need to do | |
622 | later.~\cite[p.~353]{designPatterns} | |
623 | \end{quote} | |
624 | ||
a1bafe90 EK |
625 | Although sometimes associated with |
626 | over-engineering\citing{kerievsky2005,refactoring}, design patterns are in | |
627 | general assumed to be good for maintainability of source code. That may be | |
d1adbeef EK |
628 | because many of them are designed to support the \emph{open/closed principle} of |
629 | object-oriented programming. The principle was first formulated by Bertrand | |
630 | Meyer, the creator of the Eiffel programming language, like this: ``Modules | |
631 | should be both open and closed.''\citing{meyer1988} It has been popularized, | |
632 | with this as a common version: | |
633 | ||
634 | \begin{quote} | |
635 | Software entities (classes, modules, functions, etc.) should be open for | |
636 | extension, but closed for modification.\footnote{See | |
637 | \url{http://c2.com/cgi/wiki?OpenClosedPrinciple} or | |
638 | \url{https://en.wikipedia.org/wiki/Open/closed_principle}} | |
639 | \end{quote} | |
640 | ||
641 | Maintainability is often thought of as the ability to be able to introduce new | |
a1bafe90 | 642 | functionality without having to change too much of the old code. When |
d1adbeef EK |
643 | refactoring, the motivation is often to facilitate adding new functionality. It |
644 | is about factoring the old code in a way that makes the new functionality being | |
645 | able to benefit from the functionality already residing in a software system, | |
646 | without having to copy old code into new. Then, next time someone shall add new | |
a1bafe90 EK |
647 | functionality, it is less likely that the old code has to change. Assuming that |
648 | a design pattern is the best way to get rid of duplication and assist in | |
649 | implementing new functionality, it is reasonable to conclude that a design | |
650 | pattern often is the target of a series of refactorings. Having a repertoire of | |
651 | design patterns can also help in knowing when and how to refactor a program to | |
652 | make it reflect certain desired characteristics. | |
d1adbeef EK |
653 | |
654 | \begin{quote} | |
a1bafe90 | 655 | There is a natural relation between patterns and refactorings. Patterns are |
d1adbeef EK |
656 | where you want to be; refactorings are ways to get there from somewhere |
657 | else.~\cite[p.~107]{refactoring} | |
658 | \end{quote} | |
659 | ||
660 | This quote is wise in many contexts, but it is not always appropriate to say | |
a1bafe90 EK |
661 | ``Patterns are where you want to be\ldots''. \emph{Sometimes}, patterns are |
662 | where you want to be, but only because it will benefit your design. It is not | |
663 | true that one should always try to incorporate as many design patterns as | |
664 | possible into a program. It is not like they have intrinsic value. They only add | |
665 | value to a system when they support its design. Otherwise, the use of design | |
666 | patterns may only lead to a program that is more complex than necessary. | |
d1adbeef EK |
667 | |
668 | \begin{quote} | |
669 | The overuse of patterns tends to result from being patterns happy. We are | |
670 | \emph{patterns happy} when we become so enamored of patterns that we simply | |
671 | must use them in our code.~\cite[p.~24]{kerievsky2005} | |
672 | \end{quote} | |
673 | ||
674 | This can easily happen when relying largely on up-front design. Then it is | |
675 | natural, in the very beginning, to try to build in all the flexibility that one | |
676 | believes will be necessary throughout the lifetime of a software system. | |
677 | According to Joshua Kerievsky ``That sounds reasonable --- if you happen to be | |
678 | psychic.''~\cite[p.~1]{kerievsky2005} He is advocating what he believes is a | |
679 | better approach: To let software continually evolve. To start with a simple | |
680 | design that meets today's needs, and tackle future needs by refactoring to | |
681 | satisfy them. He believes that this is a more economic approach than investing | |
682 | time and money into a design that inevitably is going to change. By relying on | |
683 | continuously refactoring a system, its design can be made simpler without | |
684 | sacrificing flexibility. To be able to fully rely on this approach, it is of | |
e123ab03 | 685 | utter importance to have a reliable suit of tests to lean on \see{testing}. This |
d1adbeef EK |
686 | makes the design process more natural and less characterized by difficult |
687 | decisions that has to be made before proceeding in the process, and that is | |
688 | going to define a project for all of its unforeseeable future. | |
689 | ||
b4d90424 | 690 | \subsection{The impact on software quality} |
00aa0588 | 691 | |
b4d90424 | 692 | \subsubsection{What is software quality?} |
00aa0588 | 693 | The term \emph{software quality} has many meanings. It all depends on the |
9a55a5bc | 694 | context we put it in. If we look at it with the eyes of a software developer, it |
a1bafe90 | 695 | usually means that the software is easily maintainable and testable, or in other |
9a55a5bc EK |
696 | words, that it is \emph{well designed}. This often correlates with the |
697 | management scale, where \emph{keeping the schedule} and \emph{customer | |
137e0e7b EK |
698 | satisfaction} is at the center. From the customers point of view, in addition to |
699 | good usability, \emph{performance} and \emph{lack of bugs} is always | |
700 | appreciated, measurements that are also shared by the software developer. (In | |
701 | addition, such things as good documentation could be measured, but this is out | |
702 | of the scope of this document.) | |
9a55a5bc | 703 | |
b4d90424 | 704 | \subsubsection{The impact on performance} |
9a55a5bc | 705 | \begin{quote} |
a1bafe90 EK |
706 | Refactoring certainly will make software go more slowly\footnote{With todays |
707 | compiler optimization techniques and performance tuning of e.g. the Java | |
708 | virtual machine, the penalties of object creation and method calls are | |
709 | debatable.}, but it also makes the software more amenable to performance | |
710 | tuning.~\cite[p.~69]{refactoring} | |
9a55a5bc | 711 | \end{quote} |
ee45c41f EK |
712 | |
713 | \noindent There is a common belief that refactoring compromises performance, due | |
714 | to increased degree of indirection and that polymorphism is slower than | |
9a55a5bc EK |
715 | conditionals. |
716 | ||
b5c7bb1b | 717 | In a survey, Demeyer\citing{demeyer2002} disproves this view in the case of |
a1bafe90 | 718 | polymorphism. He did an experiment on, what he calls, ``Transform Self Type |
9a55a5bc EK |
719 | Checks'' where you introduce a new polymorphic method and a new class hierarchy |
720 | to get rid of a class' type checking of a ``type attribute``. He uses this kind | |
721 | of transformation to represent other ways of replacing conditionals with | |
722 | polymorphism as well. The experiment is performed on the C++ programming | |
a1bafe90 EK |
723 | language and with three different compilers and platforms. Demeyer concludes |
724 | that, with compiler optimization turned on, polymorphism beats middle to large | |
725 | sized if-statements and does as well as case-statements. (In accordance with | |
726 | his hypothesis, due to similarities between the way C++ handles polymorphism and | |
727 | case-statements.) | |
ee45c41f | 728 | |
9a55a5bc EK |
729 | \begin{quote} |
730 | The interesting thing about performance is that if you analyze most programs, | |
b5c7bb1b | 731 | you find that they waste most of their time in a small fraction of the |
4cb06723 | 732 | code.~\cite[p.~70]{refactoring} |
9a55a5bc | 733 | \end{quote} |
9a55a5bc | 734 | |
ee45c41f EK |
735 | \noindent So, although an increased amount of method calls could potentially |
736 | slow down programs, one should avoid premature optimization and sacrificing good | |
fe0a4c48 EK |
737 | design, leaving the performance tuning until after \gloss{profiling} the |
738 | software and having isolated the actual problem areas. | |
00aa0588 | 739 | |
b4d90424 | 740 | \subsection{Composite refactorings}\label{compositeRefactorings} |
6065c96c | 741 | Generally, when thinking about refactoring, at the mechanical level, there are |
f65da046 | 742 | essentially two kinds of refactorings. There are the \emph{primitive} |
a1bafe90 | 743 | refactorings, and the \emph{composite} refactorings. |
6065c96c | 744 | |
6c51af15 EK |
745 | \definition{A \emph{primitive refactoring} is a refactoring that cannot be |
746 | expressed in terms of other refactorings.} | |
f65da046 | 747 | |
fe0a4c48 | 748 | \noindent Examples are the \refa{Pull Up Field} and \refa{Pull Up |
a1bafe90 | 749 | Method} refactorings\citing{refactoring}, that move members up in their class |
ee45c41f EK |
750 | hierarchies. |
751 | ||
6c51af15 EK |
752 | \definition{A \emph{composite refactoring} is a refactoring that can be |
753 | expressed in terms of two or more other refactorings.} | |
f65da046 | 754 | |
fe0a4c48 | 755 | \noindent An example of a composite refactoring is the \refa{Extract |
b5c7bb1b EK |
756 | Superclass} refactoring\citing{refactoring}. In its simplest form, it is composed |
757 | of the previously described primitive refactorings, in addition to the | |
fe0a4c48 | 758 | \refa{Pull Up Constructor Body} refactoring\citing{refactoring}. It works |
b5c7bb1b | 759 | by creating an abstract superclass that the target class(es) inherits from, then |
fe0a4c48 EK |
760 | by applying \refa{Pull Up Field}, \refa{Pull Up Method} and |
761 | \refa{Pull Up Constructor Body} on the members that are to be members of | |
f5fb40e4 EK |
762 | the new superclass. If there are multiple classes in play, their interfaces may |
763 | need to be united with the help of some rename refactorings, before extracting | |
fe0a4c48 | 764 | the superclass. For an overview of the \refa{Extract Superclass} |
8b6b22c8 | 765 | refactoring, see \myref{fig:extractSuperclass}. |
6065c96c | 766 | |
ddcea0b5 EK |
767 | \begin{figure}[h] |
768 | \centering | |
faa9f4f3 | 769 | \includegraphics[angle=270,width=\linewidth]{extractSuperclassItalic.pdf} |
f5fb40e4 | 770 | \caption{The Extract Superclass refactoring, with united interfaces.} |
ddcea0b5 EK |
771 | \label{fig:extractSuperclass} |
772 | \end{figure} | |
6065c96c | 773 | |
b4d90424 | 774 | \subsection{Manual vs. automated refactorings} |
0d7fbd88 | 775 | Refactoring is something every programmer does, even if \heshe does not known |
f65da046 EK |
776 | the term \emph{refactoring}. Every refinement of source code that does not alter |
777 | the program's behavior is a refactoring. For small refactorings, such as | |
0d7fbd88 | 778 | \ExtractMethod, executing it manually is a manageable task, but is still prone |
a1bafe90 | 779 | to errors. Getting it right the first time is not easy, considering the method |
f65da046 EK |
780 | signature and all the other aspects of the refactoring that has to be in place. |
781 | ||
f5fb40e4 EK |
782 | Consider the renaming of classes, methods and fields. For complex programs these |
783 | refactorings are almost impossible to get right. Attacking them with textual | |
784 | search and replace, or even regular expressions, will fall short on these tasks. | |
785 | Then it is crucial to have proper tool support that can perform them | |
786 | automatically. Tools that can parse source code and thus have semantic knowledge | |
787 | about which occurrences of which names belong to what construct in the program. | |
788 | For even trying to perform one of these complex task manually, one would have to | |
789 | be very confident on the existing test suite \see{testing}. | |
00aa0588 | 790 | |
b4d90424 | 791 | \subsection{Correctness of refactorings}\label{correctness} |
f65da046 | 792 | For automated refactorings to be truly useful, they must show a high degree of |
f5fb40e4 EK |
793 | behavior preservation. This last sentence might seem obvious, but there are |
794 | examples of refactorings in existing tools that break programs. In an ideal | |
795 | world, every automated refactoring would be ``complete'', in the sense that it | |
796 | would never break a program. In an ideal world, every program would also be free | |
797 | from bugs. In modern IDEs the implemented automated refactorings are working for | |
798 | \emph{most} cases, that is enough for making them useful. | |
799 | ||
800 | I will now present an example of a \emph{corner case} where a program breaks | |
801 | when a refactoring is applied. The example shows an \ExtractMethod refactoring | |
802 | followed by a \MoveMethod refactoring that breaks a program in both the | |
fe0a4c48 | 803 | \name{Eclipse} and \name{IntelliJ} IDEs\footnote{The \name{NetBeans} IDE handles this |
3ab3e132 | 804 | particular situation without altering the program's behavior, mainly because |
fe0a4c48 | 805 | its \refa{Move Method} refactoring implementation is a bit flawed in other ways |
f5fb40e4 EK |
806 | \see{toolSupport}.}. The target and the destination for the composed |
807 | refactoring is shown in \myref{lst:correctnessExtractAndMove}. Note that the | |
808 | method \method{m(C c)} of class \type{X} assigns to the field \var{x} of the | |
809 | argument \var{c} that has type \type{C}. | |
810 | ||
811 | \begin{listing}[h] | |
812 | \begin{multicols}{2} | |
53be7239 EK |
813 | \begin{minted}[linenos,frame=topline,label={Refactoring |
814 | target},framesep=\mintedframesep]{java} | |
ddcea0b5 | 815 | public class C { |
f5fb40e4 | 816 | public X x = new X(); |
ee45c41f | 817 | |
f5fb40e4 EK |
818 | public void f() { |
819 | x.m(this); | |
820 | // Not the same x | |
821 | x.n(); | |
822 | } | |
ddcea0b5 EK |
823 | } |
824 | \end{minted} | |
ee45c41f | 825 | |
f5fb40e4 | 826 | \columnbreak |
ee45c41f | 827 | |
53be7239 EK |
828 | \begin{minted}[frame=topline,label={Method |
829 | destination},framesep=\mintedframesep]{java} | |
ee45c41f | 830 | public class X { |
f5fb40e4 EK |
831 | public void m(C c) { |
832 | c.x = new X(); | |
833 | // If m is called from | |
834 | // c, then c.x no longer | |
835 | // equals 'this' | |
836 | } | |
837 | public void n() {} | |
ee45c41f EK |
838 | } |
839 | \end{minted} | |
f5fb40e4 EK |
840 | \end{multicols} |
841 | \caption{The target and the destination for the composition of the Extract | |
fe0a4c48 | 842 | Method and \refa{Move Method} refactorings.} |
f5fb40e4 EK |
843 | \label{lst:correctnessExtractAndMove} |
844 | \end{listing} | |
ee45c41f | 845 | |
f5fb40e4 EK |
846 | |
847 | The refactoring sequence works by extracting line 6 through 8 from the original | |
3510e539 | 848 | class \type{C} into a method \method{f} with the statements from those lines as |
f5fb40e4 EK |
849 | its method body (but with the comment left out, since it will no longer hold any |
850 | meaning). The method is then moved to the class \type{X}. The result is shown | |
851 | in \myref{lst:correctnessExtractAndMoveResult}. | |
ee45c41f | 852 | |
f5fb40e4 EK |
853 | Before the refactoring, the methods \method{m} and \method{n} of class \type{X} |
854 | are called on different object instances (see line 6 and 8 of the original class | |
855 | \type{C} in \cref{lst:correctnessExtractAndMove}). After the refactoring, they | |
856 | are called on the same object, and the statement on line | |
857 | 3 of class \type{X} (in \cref{lst:correctnessExtractAndMoveResult}) no longer | |
858 | has the desired effect in our example. The method \method{f} of class \type{C} | |
859 | is now calling the method \method{f} of class \type{X} (see line 5 of class | |
860 | \type{C} in \cref{lst:correctnessExtractAndMoveResult}), and the program now | |
861 | behaves different than before. | |
862 | ||
863 | \begin{listing}[h] | |
864 | \begin{multicols}{2} | |
865 | \begin{minted}[linenos]{java} | |
ee45c41f EK |
866 | public class C { |
867 | public X x = new X(); | |
868 | ||
869 | public void f() { | |
870 | x.f(this); | |
871 | } | |
872 | } | |
873 | \end{minted} | |
874 | ||
f5fb40e4 EK |
875 | \columnbreak |
876 | ||
877 | \begin{minted}[linenos]{java} | |
ee45c41f EK |
878 | public class X { |
879 | public void m(C c) { | |
880 | c.x = new X(); | |
881 | } | |
882 | public void n() {} | |
f5fb40e4 EK |
883 | // Extracted and |
884 | // moved method | |
ee45c41f EK |
885 | public void f(C c) { |
886 | m(c); | |
887 | n(); | |
888 | } | |
889 | } | |
890 | \end{minted} | |
f5fb40e4 EK |
891 | \end{multicols} |
892 | \caption{The result of the composed refactoring.} | |
893 | \label{lst:correctnessExtractAndMoveResult} | |
894 | \end{listing} | |
ddcea0b5 | 895 | |
aa1e3779 EK |
896 | The bug introduced in the previous example is of such a nature\footnote{Caused |
897 | by aliasing. See \url{https://en.wikipedia.org/wiki/Aliasing_(computing)}} | |
898 | that it is very difficult to spot if the refactored code is not covered by | |
899 | tests. It does not generate compilation errors, and will thus only result in | |
900 | a runtime error or corrupted data, which might be hard to detect. | |
19c4f27d | 901 | |
b4d90424 | 902 | \subsection{Refactoring and the importance of testing}\label{testing} |
19c4f27d EK |
903 | \begin{quote} |
904 | If you want to refactor, the essential precondition is having solid | |
905 | tests.\citing{refactoring} | |
906 | \end{quote} | |
907 | ||
29f39f29 EK |
908 | When refactoring, there are roughly three classes of errors that can be made. |
909 | The first class of errors are the ones that make the code unable to compile. | |
910 | These \emph{compile-time} errors are of the nicer kind. They flash up at the | |
911 | moment they are made (at least when using an IDE), and are usually easy to fix. | |
912 | The second class are the \emph{runtime} errors. Although they take a bit longer | |
913 | to surface, they usually manifest after some time in an illegal argument | |
914 | exception, null pointer exception or similar during the program execution. | |
915 | These kind of errors are a bit harder to handle, but at least they will show, | |
916 | eventually. Then there are the \emph{behavior-changing} errors. These errors are | |
917 | of the worst kind. They do not show up during compilation and they do not turn | |
918 | on a blinking red light during runtime either. The program can seem to work | |
919 | perfectly fine with them in play, but the business logic can be damaged in ways | |
920 | that will only show up over time. | |
921 | ||
922 | For discovering runtime errors and behavior changes when refactoring, it is | |
923 | essential to have good test coverage. Testing in this context means writing | |
924 | automated tests. Manual testing may have its uses, but when refactoring, it is | |
925 | automated unit testing that dominate. For discovering behavior changes it is | |
926 | especially important to have tests that cover potential problems, since these | |
927 | kind of errors does not reveal themselves. | |
928 | ||
929 | Unit testing is not a way to \emph{prove} that a program is correct, but it is a | |
3ab3e132 | 930 | way to make you confident that it \emph{probably} works as desired. In the |
4928aa0b | 931 | context of test-driven development (commonly known as TDD), the tests are even a |
29f39f29 EK |
932 | way to define how the program is \emph{supposed} to work. It is then, by |
933 | definition, working if the tests are passing. | |
19c4f27d EK |
934 | |
935 | If the test coverage for a code base is perfect, then it should, theoretically, | |
29f39f29 EK |
936 | be risk-free to perform refactorings on it. This is why automated tests and |
937 | refactoring are such a great match. | |
f65da046 | 938 | |
b4d90424 | 939 | \subsubsection{Testing the code from correctness section} |
29f39f29 EK |
940 | The worst thing that can happen when refactoring is to introduce changes to the |
941 | behavior of a program, as in the example on \myref{correctness}. This example | |
942 | may be trivial, but the essence is clear. The only problem with the example is | |
943 | that it is not clear how to create automated tests for it, without changing it | |
944 | in intrusive ways. | |
945 | ||
20bcc7bf | 946 | Unit tests, as they are known from the different \glosspl{xUnit} around, are |
29f39f29 EK |
947 | only suitable to test the \emph{result} of isolated operations. They can not |
948 | easily (if at all) observe the \emph{history} of a program. | |
b5d53f51 | 949 | |
a13e5650 | 950 | This problem is still open. |
116805bf | 951 | |
a13e5650 | 952 | \begin{comment} |
116805bf EK |
953 | |
954 | Assuming a sequential (non-concurrent) program: | |
955 | ||
956 | \begin{minted}{java} | |
957 | tracematch (C c, X x) { | |
958 | sym m before: | |
959 | call(* X.m(C)) && args(c) && cflow(within(C)); | |
960 | sym n before: | |
961 | call(* X.n()) && target(x) && cflow(within(C)); | |
962 | sym setCx after: | |
963 | set(C.x) && target(c) && !cflow(m); | |
964 | ||
965 | m n | |
966 | ||
967 | { assert x == c.x; } | |
968 | } | |
969 | \end{minted} | |
970 | ||
971 | %\begin{minted}{java} | |
972 | %tracematch (X x1, X x2) { | |
973 | % sym m before: | |
974 | % call(* X.m(C)) && target(x1); | |
975 | % sym n before: | |
976 | % call(* X.n()) && target(x2); | |
977 | % sym setX after: | |
978 | % set(C.x) && !cflow(m) && !cflow(n); | |
979 | % | |
980 | % m n | |
981 | % | |
982 | % { assert x1 != x2; } | |
983 | %} | |
984 | %\end{minted} | |
a13e5650 | 985 | \end{comment} |
116805bf | 986 | |
222d172b | 987 | |
b4d90424 | 988 | \section{The Project} |
222d172b | 989 | |
b4d90424 | 990 | \subsection{Project description} |
60065669 | 991 | The aim of this master's project will be to explore the relationship between the |
0fe1a611 EK |
992 | \ExtractMethod and the \MoveMethod refactorings. This will be done by composing |
993 | the two into a composite refactoring. The refactoring will be called the | |
6a779665 EK |
994 | \ExtractAndMoveMethod refactoring. |
995 | ||
996 | The two primitive \ExtractMethod and \MoveMethod refactorings must already be | |
997 | implemented in a tool, so the \ExtractAndMoveMethod refactoring is going to be | |
998 | built on top of those. | |
b5d53f51 | 999 | |
a13e5650 EK |
1000 | The composition of the \ExtractMethod and \MoveMethod refactorings springs |
1001 | naturally out of the need to move procedures closer to the data they manipulate. | |
1002 | This composed refactoring is not well described in the literature, but it is | |
1003 | implemented in at least one tool called | |
fe0a4c48 EK |
1004 | \name{CodeRush}\footnote{\url{https://help.devexpress.com/\#CodeRush/CustomDocument3519}}, |
1005 | that is an extension for \name{MS Visual | |
b5d53f51 | 1006 | Studio}\footnote{\url{http://www.visualstudio.com/}}. In CodeRush it is called |
fe0a4c48 | 1007 | \refa{Extract Method to |
b5d53f51 | 1008 | Type}\footnote{\url{https://help.devexpress.com/\#CodeRush/CustomDocument6710}}, |
0e6e57d3 | 1009 | but I choose to call it \ExtractAndMoveMethod, since I feel this better |
b5d53f51 EK |
1010 | communicates which primitive refactorings it is composed of. |
1011 | ||
0fe1a611 | 1012 | The project will consist of implementing the \ExtractAndMoveMethod refactoring, |
0e6e57d3 | 1013 | as well as executing it over a larger code base, as a case study. To be able to |
0fe1a611 | 1014 | execute the refactoring automatically, I have to make it analyze code to |
0e6e57d3 | 1015 | determine the best selections to extract into new methods. |
70905ddc | 1016 | |
b4d90424 | 1017 | \subsection{The primitive refactorings} |
021508ad EK |
1018 | The refactorings presented here are the primitive refactorings used in this |
1019 | project. They are the abstract building blocks used by the \ExtractAndMoveMethod | |
1020 | refactoring. | |
04e21f15 | 1021 | |
b4d90424 | 1022 | \paragraph{The Extract Method refactoring} |
04e21f15 EK |
1023 | The \refa{Extract Method} refactoring is used to extract a fragment of code |
1024 | from its context and into a new method. A call to the new method is inlined | |
d516ac0b EK |
1025 | where the fragment was before. It is used to break code into logical units, with |
1026 | names that explain their purpose. | |
1027 | ||
1028 | An example of an \ExtractMethod refactoring is shown in | |
1029 | \myref{lst:extractMethodRefactoring}. It shows a method containing calls to the | |
1030 | methods \method{foo} and \method{bar} of a type \type{X}. These statements are | |
1031 | then extracted into the new method \method{fooBar}. | |
d516ac0b | 1032 | |
021508ad | 1033 | \begin{listing}[h] |
d516ac0b | 1034 | \begin{multicols}{2} |
53be7239 | 1035 | \begin{minted}[samepage,frame=topline,label={Before},framesep=\mintedframesep]{java} |
d516ac0b EK |
1036 | class C { |
1037 | void method() { | |
1038 | X x = new X(); | |
1039 | x.foo(); x.bar(); | |
1040 | } | |
1041 | } | |
1042 | \end{minted} | |
1043 | ||
1044 | \columnbreak | |
1045 | ||
53be7239 | 1046 | \begin{minted}[samepage,frame=topline,label={After},framesep=\mintedframesep]{java} |
d516ac0b EK |
1047 | class C { |
1048 | void method() { | |
1049 | X x = new X(); | |
1050 | fooBar(x); | |
1051 | } | |
d516ac0b EK |
1052 | void fooBar(X x) { |
1053 | x.foo(); x.bar(); | |
1054 | } | |
1055 | } | |
1056 | \end{minted} | |
1057 | \end{multicols} | |
1058 | \caption{An example of an \ExtractMethod refactoring.} | |
1059 | \label{lst:extractMethodRefactoring} | |
1060 | \end{listing} | |
04e21f15 | 1061 | |
b4d90424 | 1062 | \paragraph{The Move Method refactoring} |
04e21f15 | 1063 | The \refa{Move Method} refactoring is used to move a method from one class to |
4306ef44 EK |
1064 | another. This can be appropriate if the method is using more features of another |
1065 | class than of the class which it is currently defined. | |
1066 | ||
1067 | \Myref{lst:moveMethodRefactoring} shows an example of this refactoring. Here a | |
1068 | method \method{fooBar} is moved from the class \type{C} to the class \type{X}. | |
1069 | ||
021508ad | 1070 | \begin{listing}[h] |
4306ef44 | 1071 | \begin{multicols}{2} |
53be7239 | 1072 | \begin{minted}[samepage,frame=topline,label={Before},framesep=\mintedframesep]{java} |
4306ef44 EK |
1073 | class C { |
1074 | void method() { | |
1075 | X x = new X(); | |
1076 | fooBar(x); | |
1077 | } | |
1078 | void fooBar(X x) { | |
1079 | x.foo(); x.bar(); | |
1080 | } | |
1081 | } | |
1082 | ||
1083 | class X { | |
1084 | void foo(){/*...*/} | |
1085 | void bar(){/*...*/} | |
1086 | } | |
1087 | \end{minted} | |
1088 | ||
1089 | \columnbreak | |
1090 | ||
53be7239 | 1091 | \begin{minted}[samepage,frame=topline,label={After},framesep=\mintedframesep]{java} |
4306ef44 EK |
1092 | class C { |
1093 | void method() { | |
1094 | X x = new X(); | |
1095 | x.fooBar(); | |
1096 | } | |
1097 | } | |
1098 | ||
1099 | class X { | |
1100 | void fooBar() { | |
1101 | foo(); bar(); | |
1102 | } | |
1103 | void foo(){/*...*/} | |
1104 | void bar(){/*...*/} | |
1105 | } | |
1106 | \end{minted} | |
1107 | \end{multicols} | |
1108 | \caption{An example of a \MoveMethod refactoring.} | |
1109 | \label{lst:moveMethodRefactoring} | |
1110 | \end{listing} | |
04e21f15 | 1111 | |
b4d90424 | 1112 | \subsection{The Extract and Move Method refactoring} |
021508ad EK |
1113 | The \ExtractAndMoveMethod refactoring is a composite refactoring composed of the |
1114 | primitive \ExtractMethod and \MoveMethod refactorings. The effect of this | |
1115 | refactoring on source code is the same as when extracting a method and moving it | |
e36eade0 | 1116 | to another class. Conceptually, this is done without an intermediate step. In |
021508ad EK |
1117 | practice, as we shall see later, an intermediate step may be necessary. |
1118 | ||
1119 | An example of this composite refactoring is shown in | |
1120 | \myref{lst:extractAndMoveMethodRefactoring}. The example joins the examples from | |
1121 | \cref{lst:extractMethodRefactoring} and \cref{lst:moveMethodRefactoring}. This | |
1122 | means that the selection consisting of the consecutive calls to the methods | |
1123 | \method{foo} and \method{bar}, is extracted into a new method \method{fooBar} | |
1124 | located in the class \type{X}. | |
1125 | ||
1126 | \begin{listing}[h] | |
1127 | \begin{multicols}{2} | |
53be7239 | 1128 | \begin{minted}[samepage,frame=topline,label={Before},framesep=\mintedframesep]{java} |
021508ad EK |
1129 | class C { |
1130 | void method() { | |
1131 | X x = new X(); | |
1132 | x.foo(); x.bar(); | |
1133 | } | |
1134 | } | |
1135 | ||
1136 | class X { | |
1137 | void foo(){/*...*/} | |
1138 | void bar(){/*...*/} | |
1139 | } | |
1140 | \end{minted} | |
1141 | ||
1142 | \columnbreak | |
1143 | ||
53be7239 | 1144 | \begin{minted}[samepage,frame=topline,label={After},framesep=\mintedframesep]{java} |
021508ad EK |
1145 | class C { |
1146 | void method() { | |
1147 | X x = new X(); | |
1148 | x.fooBar(); | |
1149 | } | |
1150 | } | |
04e21f15 | 1151 | |
021508ad EK |
1152 | class X { |
1153 | void fooBar() { | |
1154 | foo(); bar(); | |
1155 | } | |
1156 | void foo(){/*...*/} | |
1157 | void bar(){/*...*/} | |
1158 | } | |
1159 | \end{minted} | |
1160 | \end{multicols} | |
1161 | \caption{An example of the \ExtractAndMoveMethod refactoring.} | |
1162 | \label{lst:extractAndMoveMethodRefactoring} | |
1163 | \end{listing} | |
04e21f15 | 1164 | |
b4d90424 | 1165 | \subsection{Research questions} |
0fe1a611 EK |
1166 | The main question that I seek an answer to in this thesis is: |
1167 | ||
1168 | \begin{quote} | |
1169 | Is it possible to automate the analysis and execution of the | |
1170 | \ExtractAndMoveMethod refactoring, and do so for all of the code of a larger | |
1171 | project? | |
1172 | \end{quote} | |
1173 | ||
1174 | \noindent The secondary questions will then be: | |
1175 | ||
1176 | \paragraph{Can we do this efficiently?} Can we automate the analysis and | |
1177 | execution of the refactoring so it can be run in a reasonable amount of time? | |
1178 | And what does \emph{reasonable} mean in this context? | |
1179 | ||
1180 | And, assuming the refactoring does in fact improve the quality of source code: | |
1181 | ||
1182 | \paragraph{How can the automation of the refactoring be helpful?} What is the | |
e36eade0 | 1183 | usefulness of the refactoring in a software development setting? In what parts |
0fe1a611 EK |
1184 | of the development process can the refactoring play a role? |
1185 | ||
b4d90424 | 1186 | \subsection{The premises} |
0cbeeadb | 1187 | \todoin{Appropriate name?} |
b4d90424 EK |
1188 | |
1189 | \subsubsection{Choosing the target language} | |
70905ddc EK |
1190 | Choosing which programming language the code that shall be manipulated shall be |
1191 | written in, is not a very difficult task. We choose to limit the possible | |
1192 | languages to the object-oriented programming languages, since most of the | |
1193 | terminology and literature regarding refactoring comes from the world of | |
1194 | object-oriented programming. In addition, the language must have existing tool | |
1195 | support for refactoring. | |
1196 | ||
fe0a4c48 | 1197 | The \name{Java} programming language\footnote{\url{https://www.java.com/}} is |
70905ddc EK |
1198 | the dominating language when it comes to example code in the literature of |
1199 | refactoring, and is thus a natural choice. Java is perhaps, currently the most | |
fe0a4c48 | 1200 | influential programming language in the world, with its \name{Java Virtual |
70905ddc | 1201 | Machine} that runs on all of the most popular architectures and also supports |
e36eade0 | 1202 | dozens of other programming languages\footnote{They compile to Java bytecode.}, |
fe0a4c48 | 1203 | with \name{Scala}, \name{Clojure} and \name{Groovy} as the most prominent ones. |
70905ddc EK |
1204 | Java is currently the language that every other programming language is compared |
1205 | against. It is also the primary programming language for the author of this | |
1206 | thesis. | |
3f929fcc | 1207 | |
b4d90424 | 1208 | \subsubsection{Choosing the tools} |
3ab3e132 | 1209 | When choosing a tool for manipulating Java, there are certain criteria that |
3f929fcc EK |
1210 | have to be met. First of all, the tool should have some existing refactoring |
1211 | support that this thesis can build upon. Secondly it should provide some kind of | |
1212 | framework for parsing and analyzing Java source code. Third, it should itself be | |
1213 | open source. This is both because of the need to be able to browse the code for | |
1214 | the existing refactorings that is contained in the tool, and also because open | |
1215 | source projects hold value in them selves. Another important aspect to consider | |
1216 | is that open source projects of a certain size, usually has large communities of | |
3ab3e132 EK |
1217 | people connected to them, that are committed to answering questions regarding the |
1218 | use and misuse of the products, that to a large degree is made by the community | |
3f929fcc EK |
1219 | itself. |
1220 | ||
3ab3e132 | 1221 | There is a certain class of tools that meet these criteria, namely the class of |
3f929fcc | 1222 | \emph{IDEs}\footnote{\emph{Integrated Development Environment}}. These are |
3ab3e132 | 1223 | programs that is meant to support the whole production cycle of a computer |
3f929fcc EK |
1224 | program, and the most popular IDEs that support Java, generally have quite good |
1225 | refactoring support. | |
1226 | ||
fe0a4c48 EK |
1227 | The main contenders for this thesis is the \name{Eclipse IDE}, with the |
1228 | \name{Java development tools} (JDT), the \name{IntelliJ IDEA Community Edition} | |
1229 | and the \name{NetBeans IDE} \see{toolSupport}. \name{Eclipse} and | |
1230 | \name{NetBeans} are both free, open source and community driven, while the | |
1231 | \name{IntelliJ IDEA} has an open sourced community edition that is free of | |
1232 | charge, but also offer an \name{Ultimate Edition} with an extended set of | |
1233 | features, at additional cost. All three IDEs supports adding plugins to extend | |
1234 | their functionality and tools that can be used to parse and analyze Java source | |
1235 | code. But one of the IDEs stand out as a favorite, and that is the \name{Eclipse | |
1236 | IDE}. This is the most popular\citing{javaReport2011} among them and seems to be | |
1237 | de facto standard IDE for Java development regardless of platform. | |
4e135659 | 1238 | |
a5317dcf | 1239 | |
aceb848c | 1240 | \chapter{The search-based Extract and Move Method refactoring} |
9503a520 EK |
1241 | In this chapter I will delve into the workings of the search-based |
1242 | \ExtractAndMoveMethod refactoring. We will see the choices it must make along | |
1243 | the way and why it chooses a text selection as a candidate for refactoring or | |
1244 | not. | |
1245 | ||
e36eade0 | 1246 | After defining some concepts, I will introduce an example that will be used |
9503a520 EK |
1247 | throughout the chapter to illustrate how the refactoring works in some simple |
1248 | situations. | |
c894b297 EK |
1249 | |
1250 | \section{The inputs to the refactoring} | |
1251 | For executing an \ExtractAndMoveMethod refactoring, there are two simple | |
f5355077 | 1252 | requirements. The first thing the refactoring needs is a text selection, telling |
c894b297 EK |
1253 | it what to extract. Its second requirement is a target for the subsequent move |
1254 | operation. | |
1255 | ||
1256 | The extracted method must be called instead of the selection that makes up its | |
1257 | body. Also, the method call has to be performed via a variable, since the method | |
aceb848c EK |
1258 | is not static. Therefore, the move target must be a variable in the scope of the |
1259 | extracted selection. The actual new location for the extracted method will be | |
1260 | the class representing the type of the move target variable. But, since the | |
1261 | method also must be called through a variable, it makes sense to define the move | |
1262 | target to be either a local variable or a field in the scope of the text | |
1263 | selection. | |
c894b297 | 1264 | |
53be7239 EK |
1265 | \section{Defining a text selection} |
1266 | A text selection, in our context, is very similar to what you think of when | |
1267 | selecting a bit of text in your editor or other text processing tool with your | |
1268 | mouse or keyboard. It is an abstract construct that is meant to capture which | |
1269 | specific portion of text we are about to deal with. | |
1270 | ||
1271 | To be able to clearly reason about a text selection done to a portion of text in | |
1272 | a computer file, that consist of pure text, we put up the following definition. | |
1273 | ||
1274 | \definition{A \emph{text selection} in a text file is defined by two | |
1275 | non-negative integers, in addition to a reference to the file itself. The first | |
1276 | integer is an offset into the file, while the second reference is the length of | |
1277 | the text selection.} | |
1278 | ||
1279 | This means that the selected text consist of a number of characters equal to the | |
1280 | length of the selection, where the first character is found at the specified | |
1281 | offset. | |
1282 | ||
b4d90424 EK |
1283 | \section{Where we look for text selections} |
1284 | ||
1285 | \subsection{Text selections are found in methods} | |
1286 | The text selections we are interested in are those that surrounds program | |
1287 | statements. Therefore, the place we look for selections that can form candidates | |
1288 | for an execution of the \ExtractAndMoveMethod refactoring, is within the body of | |
1289 | a single method. | |
1290 | ||
1291 | \paragraph{On ignoring static methods} | |
1292 | In this project we are not analyzing static methods for candidates to the | |
1293 | \ExtractAndMoveMethod refactoring. The reason for this is that in the cases | |
1294 | where we want to perform the refactoring for a selection within a static method, | |
1295 | the first step is to extract the selection into a new method. Hence this method | |
1296 | also become static, since it must be possible to call it from a static context. | |
1297 | It would then be difficult to move the method to another class, make it | |
1298 | non-static and calling it through a variable. To avoid these obstacles, we | |
1299 | simply ignore static methods. | |
1300 | ||
53be7239 EK |
1301 | \begin{listing}[htb] |
1302 | \def\charwidth{5.8pt} | |
9db7f4cf | 1303 | \def\indent{2*\charwidth} |
53be7239 EK |
1304 | \def\lineheight{\baselineskip} |
1305 | \def\mintedtop{2*\lineheight+5.8pt} | |
1306 | ||
1307 | \begin{tikzpicture}[overlay, yscale=-1, xshift=3.8pt+\charwidth*31] | |
1308 | \tikzstyle{overlaybox}=[fill=lightgray,opacity=0.2] | |
1309 | % Level 1 | |
1310 | \draw[overlaybox] (\indent,\mintedtop+\lineheight*4) rectangle | |
9db7f4cf | 1311 | +(23*\charwidth,17*\lineheight); |
53be7239 EK |
1312 | |
1313 | % Level 2 | |
1314 | \draw[overlaybox] (2*\indent,\mintedtop+5*\lineheight) rectangle | |
9db7f4cf | 1315 | +(15*\charwidth,3*\lineheight); |
53be7239 | 1316 | \draw[overlaybox] (2*\indent,\mintedtop+15*\lineheight) rectangle |
9db7f4cf | 1317 | +(15*\charwidth,3*\lineheight); |
53be7239 | 1318 | \draw[overlaybox] (2*\indent,\mintedtop+19*\lineheight) rectangle |
9db7f4cf | 1319 | +(15*\charwidth,\lineheight); |
53be7239 EK |
1320 | \end{tikzpicture} |
1321 | \begin{multicols}{2} | |
01d46361 | 1322 | \begin{minted}[linenos,frame=topline,label=Clean,framesep=\mintedframesep]{java} |
53be7239 | 1323 | class C { |
9db7f4cf EK |
1324 | A a; B b; boolean bool; |
1325 | ||
1326 | void method(int val) { | |
1327 | if (bool) { | |
1328 | a.foo(); | |
1329 | a = new A(); | |
1330 | a.bar(); | |
1331 | } | |
1332 | ||
1333 | a.foo(); | |
1334 | a.bar(); | |
1335 | ||
1336 | switch (val) { | |
1337 | case 1: | |
1338 | b.a.foo(); | |
1339 | b.a.bar(); | |
1340 | break; | |
1341 | default: | |
1342 | a.foo(); | |
53be7239 | 1343 | } |
9db7f4cf | 1344 | } |
53be7239 EK |
1345 | } |
1346 | \end{minted} | |
1347 | ||
1348 | \columnbreak | |
1349 | ||
9db7f4cf EK |
1350 | \begin{minted}[frame=topline,label={With statement |
1351 | sequences},framesep=\mintedframesep]{java} | |
53be7239 | 1352 | class C { |
9db7f4cf EK |
1353 | A a; B b; boolean bool; |
1354 | ||
1355 | void method(int val) { | |
1356 | if (bool) { | |
1357 | a.foo(); | |
1358 | a = new A(); | |
1359 | a.bar(); | |
53be7239 | 1360 | } |
9db7f4cf EK |
1361 | |
1362 | a.foo(); | |
1363 | a.bar(); | |
1364 | ||
1365 | switch (val) { | |
1366 | case 1: | |
1367 | b.a.foo(); | |
1368 | b.a.bar(); | |
1369 | break; | |
1370 | default: | |
1371 | a.foo(); | |
1372 | } | |
1373 | } | |
53be7239 EK |
1374 | } |
1375 | \end{minted} | |
1376 | ||
1377 | \end{multicols} | |
1378 | \caption{Classes \type{A} and \type{B} are both public. The methods | |
1379 | \method{foo} and \method{bar} are public members of class \type{A}.} | |
1380 | \label{lst:grandExample} | |
1381 | \end{listing} | |
1382 | ||
b4d90424 | 1383 | \subsection{The possible text selections of a method body} |
0cbeeadb | 1384 | \todoin{dummy todo} |
b4d90424 EK |
1385 | The number of possible text selections that can be made from the text in a |
1386 | method body, are equal to all the sub-sequences of characters within it. For our | |
f5355077 EK |
1387 | purposes, analyzing program source code, we must define what it means for a text |
1388 | selection to be valid. | |
53be7239 EK |
1389 | |
1390 | \definition{A \emph{valid text selection} is a text selection that contains all | |
1391 | of one or more consecutive program statements.} | |
1392 | ||
1393 | For a sequence of statements, the text selections that can be made from it, are | |
1394 | equal to all its sub-sequences. \Myref{lst:textSelectionsExample} show an | |
1395 | example of all the text selections that can be made from the code in | |
e36eade0 | 1396 | \myref{lst:grandExample}, lines 16-18. For convenience and the clarity of this |
01d46361 EK |
1397 | example, the text selections are represented as tuples with the start and end |
1398 | line of all selections: $\{(16), (17), (18), (16,17), (16,18), (17,18)\}$. | |
53be7239 EK |
1399 | |
1400 | \begin{listing}[htb] | |
1401 | \def\charwidth{5.7pt} | |
1402 | \def\indent{4*\charwidth} | |
1403 | \def\lineheight{\baselineskip} | |
01d46361 | 1404 | \def\mintedtop{\lineheight-1pt} |
53be7239 EK |
1405 | |
1406 | \begin{tikzpicture}[overlay, yscale=-1] | |
1407 | \tikzstyle{overlaybox}=[fill=lightgray,opacity=0.2] | |
1408 | ||
1409 | % First statement | |
1410 | \draw[overlaybox] (2*\charwidth,\mintedtop) rectangle | |
1411 | +(16*\charwidth,\lineheight); | |
1412 | ||
1413 | % Second statement | |
1414 | \draw[overlaybox] (2*\charwidth,\mintedtop+\lineheight) rectangle | |
1415 | +(16*\charwidth,\lineheight); | |
1416 | ||
1417 | % Third statement | |
1418 | \draw[overlaybox] (2*\charwidth,\mintedtop+2*\lineheight) rectangle | |
1419 | +(16*\charwidth,\lineheight); | |
1420 | ||
1421 | \draw[overlaybox] (\indent-3*\charwidth,\mintedtop) rectangle | |
1422 | +(18*\charwidth,2*\lineheight); | |
1423 | ||
1424 | \draw[overlaybox] (3*\charwidth,\mintedtop+\lineheight) rectangle | |
1425 | +(14*\charwidth,2*\lineheight); | |
1426 | ||
1427 | % All | |
1428 | \draw[overlaybox] (\indent,\mintedtop) rectangle | |
1429 | +(12*\charwidth,3*\lineheight); | |
1430 | \end{tikzpicture} | |
1431 | % indent should be 5 spaces | |
9503a520 | 1432 | \begin{minted}[linenos,firstnumber=16]{java} |
53be7239 EK |
1433 | b.a.foo(); |
1434 | b.a.bar(); | |
1435 | break; | |
1436 | \end{minted} | |
1437 | \caption{Example of how the text selections generator would generate text | |
1438 | selections based on a lists of statements. Each highlighted rectangle | |
1439 | represents a text selection.} | |
1440 | \label{lst:textSelectionsExample} | |
1441 | \end{listing} | |
1442 | ||
1443 | Each nesting level of a method body can have many such sequences of statements. | |
9503a520 | 1444 | The outermost nesting level has one such sequence, and each branch contains |
01d46361 EK |
1445 | their own sequence of statements. \Myref{lst:grandExample} has a version of some |
1446 | code where all such sequences of statements are highlighted for a method body. | |
47c0bea8 | 1447 | |
9503a520 EK |
1448 | To complete our example of possible text selections, I will now list all |
1449 | possible text selections for the method in \myref{lst:grandExample}, by nesting | |
f5355077 | 1450 | level. There are 23 of them in total. |
47c0bea8 | 1451 | |
9503a520 EK |
1452 | \begin{description} |
1453 | \item[Level 1 (10 selections)] \hfill \\ | |
1454 | $\{(5,9), (11), (12), (14,21), (5,11), (5,12), (5,21), (11,12), | |
1455 | (11,21), \\(12,21)\}$ | |
8de7cf3c | 1456 | |
9503a520 EK |
1457 | \item[Level 2 (13 selections)] \hfill \\ |
1458 | $\{(6), (7), (8), (6,7), (6,8), (7,8), (16), (17), (18), (16,17), (16,18), \\ | |
1459 | (17,18), (20)\}$ | |
1460 | \end{description} | |
c894b297 | 1461 | |
b4d90424 EK |
1462 | \subsubsection{The complexity}\label{sec:complexity} |
1463 | The complexity of how many text selections that needs to be analyzed for a body | |
1464 | of in total $n$ statements, is bounded by $O(n^2)$. A body of statements is here | |
1465 | all the statements in all nesting levels of a sequence of statements. A method | |
1466 | body (or a block) is a body of statements. To prove that the complexity is | |
1467 | bounded by $O(n^2)$, I present a couple of theorems and proves them. | |
1468 | ||
1469 | \begin{theorem} | |
1470 | The number of text selections that need to be analyzed for each list of | |
1471 | statements of length $n$, is exactly | |
1472 | ||
1473 | \begin{equation*} | |
1474 | \sum_{i=1}^{n} i = \frac{n(n+1)}{2} | |
1475 | \label{eq:complexityStatementList} | |
1476 | \end{equation*} | |
1477 | \label{thm:numberOfTextSelection} | |
1478 | \end{theorem} | |
1479 | ||
1480 | \begin{proof} | |
1481 | For $n=1$ this is trivial: $\frac{1(1+1)}{2} = \frac{2}{2} = 1$. One statement | |
1482 | equals one selection. | |
1483 | ||
1484 | For $n=2$, you get one text selection for the first statement, one selection | |
1485 | for the second statement, and one selection for the two of them combined. | |
1486 | This equals three selections. $\frac{2(2+1)}{2} = \frac{6}{2} = 3$. | |
1487 | ||
1488 | For $n=3$, you get 3 selections for the two first statements, as in the case | |
1489 | where $n=2$. In addition you get one selection for the third statement itself, | |
1490 | and two more statements for the combinations of it with the two previous | |
1491 | statements. This equals six selections. $\frac{3(3+1)}{2} = \frac{12}{2} = 6$. | |
1492 | ||
1493 | Assume that for $n=k$ there exists $\frac{k(k+1)}{2}$ text selections. Then we | |
1494 | want to add selections for another statement, following the previous $k$ | |
1495 | statements. So, for $n=k+1$, we get one additional selection for the statement | |
1496 | itself. Then we get one selection for each pair of the new selection and the | |
1497 | previous $k$ statements. So the total number of selections will be the number | |
1498 | of already generated selections, plus $k$ for every pair, plus one for the | |
1499 | statement itself: $\frac{k(k+1)}{2} + k + | |
1500 | 1 = \frac{k(k+1)+2k+2}{2} = \frac{k(k+1)+2(k+1)}{2} = \frac{(k+1)(k+2)}{2} = | |
1501 | \frac{(k+1)((k+1)+1)}{2} = \sum_{i=1}^{k+1} i$ | |
1502 | \end{proof} | |
1503 | ||
1504 | %\definition{A \emph{body of statements} is a sequence of statements where every | |
1505 | %statement may have sub-statements.} | |
1506 | ||
1507 | \begin{theorem} | |
1508 | The number of text selections for a body of statements is maximized if all the | |
1509 | statements are at the same level. | |
1510 | \label{thm:textSelectionsMaximized} | |
1511 | \end{theorem} | |
1512 | ||
1513 | \begin{proof} | |
1514 | Assume we have a body of, in total, $k$ statements. Then, the sum of the | |
1515 | lengths of all the lists of statements in the body, is also $k$. Let | |
1516 | $\{l,\ldots,m,(k-l-\ldots-m)\}$ be the lengths of the lists of statements in | |
1517 | the body, with $l+\ldots+m<k \Rightarrow \forall i \in \{l,\ldots,m\} : i < k$. | |
1518 | ||
1519 | Then, the number of text selections that are generated for the $k$ statements | |
1520 | is | |
1521 | ||
1522 | { | |
1523 | \small | |
1524 | \begin{align*} | |
1525 | \frac{l(l+1)}{2} + \ldots + \frac{m(m+1)}{2} + | |
1526 | \frac{(k-l-\ldots-m)((k-l-\ldots-m)+ 1)}{2} = \\ | |
1527 | \frac{l^2+l}{2} + \ldots + \frac{m^2+m}{2} + \frac{k^2 - 2kl - \ldots - 2km + | |
1528 | l^2 + \ldots + m^2 + k - l - \ldots - m}{2} = \\ | |
1529 | \frac{2l^2 - 2kl + \ldots + 2m^2 - 2km + k^2 + k}{2} | |
1530 | \end{align*} | |
1531 | } | |
1532 | ||
1533 | \noindent It then remains to show that this inequality holds: | |
1534 | ||
1535 | \begin{align*} | |
1536 | \frac{2l^2 - 2kl + \ldots + 2m^2 - 2km + k^2 + k}{2} < \frac{k(k+1)}{2} = | |
1537 | \frac{k^2 + k}{2} | |
1538 | \end{align*} | |
1539 | ||
1540 | \noindent By multiplication by $2$ on both sides, and by removing the equal | |
1541 | parts, we get | |
1542 | ||
1543 | \begin{align*} | |
1544 | 2l^2 - 2kl + \ldots + 2m^2 - 2km < 0 | |
1545 | \end{align*} | |
1546 | ||
1547 | Since $\forall i \in \{l,\ldots,m\} : i < k$, we have that $\forall i \in | |
1548 | \{l,\ldots,m\} : 2ki > 2i^2$, so all the pairs of parts on the form $2i^2-2ki$ | |
1549 | are negative. In sum, the inequality holds. | |
1550 | ||
1551 | \end{proof} | |
1552 | ||
1553 | Therefore, the complexity for the number of selections that needs to be analyzed | |
1554 | for a body of $n$ statements is $O\bigl(\frac{n(n+1)}{2}\bigr) = O(n^2)$. | |
1555 | ||
0cc6a67d | 1556 | \section{Disqualifying a selection} |
1c521a77 EK |
1557 | Certain text selections would lead to broken code if used as input to the |
1558 | \ExtractAndMoveMethod refactoring. To avoid this, we have to check all text | |
f5355077 EK |
1559 | selections for such conditions before they are further analyzed. This section |
1560 | is therefore going to present some properties that make a selection unsuitable | |
15327961 | 1561 | for our refactoring. |
1c521a77 EK |
1562 | |
1563 | \subsection{A call to a protected or package-private method} | |
21506154 EK |
1564 | If a text selection contains a call to a protected or package-private method, it |
1565 | would not be safe to move it to another class. The reason for this, is that we | |
1566 | cannot know if the called method is being overridden by some subclass of the | |
1567 | \gloss{enclosingClass}, or not. | |
1c521a77 EK |
1568 | |
1569 | Imagine that the protected method \method{foo} is declared in class \m{A}, | |
1570 | and overridden in class \m{B}. The method \method{foo} is called from within a | |
1571 | selection done to a method in \m{A}. We want to extract and move this selection | |
1572 | to another class. The method \method{foo} is not public, so the \MoveMethod | |
1573 | refactoring must make it public, making the extracted method able to call it | |
f5355077 | 1574 | from the extracted method's new location. The problem is, that the now public |
1c521a77 EK |
1575 | method \method{foo} is overridden in a subclass, where it has a protected |
1576 | status. This makes the compiler complain that the subclass \m{B} is trying to | |
1577 | reduce the visibility of a method declared in its superclass \m{A}. This is not | |
21506154 | 1578 | allowed in Java, and for good reasons. It would make it possible to make a |
1c521a77 | 1579 | subclass that could not be a substitute for its superclass. |
1c521a77 | 1580 | |
21506154 EK |
1581 | The problem this check helps to avoid, is a little subtle. The problem does not |
1582 | arise in the class where the change is done, but in a class derived from it. | |
1583 | This shows that classes acting as superclasses are especially fragile to | |
b4d90424 EK |
1584 | introducing errors in the context of automated refactoring. |
1585 | \begin{comment} | |
1586 | This is also shown in bug\ldots \todoin{File Eclipse bug report} | |
1587 | \end{comment} | |
21506154 | 1588 | |
1fceb439 EK |
1589 | \subsection{A double class instance creation} |
1590 | The following is a problem caused solely by the underlying \MoveMethod | |
1591 | refactoring. The problem occurs if two classes are instantiated such that the | |
1592 | first constructor invocation is an argument to a second, and that the first | |
1593 | constructor invocation takes an argument that is built up using a field. As an | |
1594 | example, say that \var{name} is a field of the enclosing class, and we have the | |
1595 | expression \code{new A(new B(name))}. If this expression is located in a | |
1596 | selection that is moved to another class, \var{name} will be left untouched, | |
1597 | instead of being prefixed with a variable of the same type as it is declared in. | |
1598 | If \var{name} is the destination for the move, it is not replaced by | |
1599 | \code{this}, or removed if it is a prefix to a member access | |
1600 | (\code{name.member}), but it is still left by itself. | |
1601 | ||
1602 | Situations like this would lead to code that will not compile. Therefore, we | |
1603 | have to avoid them by not allowing selections to contain such double class | |
1604 | instance creations that also contains references to fields. | |
b4d90424 | 1605 | \begin{comment} |
1fceb439 | 1606 | \todoin{File Eclipse bug report} |
b4d90424 | 1607 | \end{comment} |
1fceb439 | 1608 | |
1c521a77 EK |
1609 | \subsection{Instantiation of non-static inner class} |
1610 | When a non-static inner class is instantiated, this must happen in the scope of | |
1611 | its declaring class. This is because it must have access to the members of the | |
1612 | declaring class. If the inner class is public, it is possible to instantiate it | |
1613 | through an instance of its declaring class, but this is not handled by the | |
1614 | underlying \MoveMethod refactoring. | |
1615 | ||
1616 | Performing a move on a method that instantiates a non-static inner class, will | |
1617 | break the code if the instantiation is not handled properly. For this reason, | |
1618 | selections that contains instantiations of non-static inner classes are deemed | |
1619 | unsuitable for the \ExtractAndMoveMethod refactoring. | |
1620 | ||
1621 | \subsection{References to enclosing instances of the enclosing class} | |
1622 | The title of this section may be a little hard to grasp at first. What it means | |
1623 | is that there is a (non-static) class \m{C} that is declared in the scope of | |
98b06ec2 EK |
1624 | possibly multiple other classes. And there is a statement in the body of a |
1625 | method declared in class \m{C}, that contains a reference to one or more | |
1626 | instances of these enclosing classes of \m{C}. | |
1c521a77 EK |
1627 | |
1628 | The problem with this, is that these references may not be valid if they are | |
1629 | moved to another class. Theoretically, some situations could easily be solved by | |
1630 | passing, to the moved method, a reference to the instance where the problematic | |
98b06ec2 EK |
1631 | referenced member is declared. This should work in the case where this member is |
1632 | publicly accessible. This is not done in the underlying \MoveMethod refactoring, | |
1633 | so it cannot be allowed in the \ExtractAndMoveMethod refactoring either. | |
1c521a77 | 1634 | |
15327961 | 1635 | \subsection{Inconsistent return statements} |
cb903a13 EK |
1636 | To verify that a text selection is consistent with respect to return statements, |
1637 | we must check that if a selection contains a return statement, then every | |
1638 | possible execution path within the selection ends in either a return or a throw | |
15327961 EK |
1639 | statement. This property is important regarding the \ExtractMethod refactoring. |
1640 | If it holds, it means that a method could be extracted from the selection, and a | |
1641 | call to it could be substituted for the selection. If the method has a non-void | |
1642 | return type, then a call to it would also be a valid return point for the | |
1643 | calling method. If its return value is of the void type, then the \ExtractMethod | |
1644 | refactoring will append an empty return statement to the back of the method | |
1645 | call. Therefore, the analysis does not discriminate on either kinds of return | |
1646 | statements, with or without a return value. | |
1647 | ||
cb903a13 EK |
1648 | A throw statement is accepted anywhere a return statement is required. This is |
1649 | because a throw statement causes an immediate exit from the current block, | |
1650 | together with all outer blocks in its control flow that does not catch the | |
1651 | thrown exception. | |
15327961 EK |
1652 | |
1653 | Return statements can be either explicit or implicit. An \emph{explicit} return | |
1654 | statement is formed by using the \code{return} keyword, while an \emph{implicit} | |
d59e3ab7 EK |
1655 | return statement is a statement that is not formed using \code{return}, but must |
1656 | be the last statement of a method that can have any side effects. This can | |
1657 | happen in methods with a void return type. An example is a statement that is | |
15327961 | 1658 | inside one or more blocks. The last statement of a method could for instance be |
cb903a13 EK |
1659 | a synchronized statement, but the last statement that is executed in the method, |
1660 | and that can have any side effects, may be located inside the body of the | |
1661 | synchronized statement. | |
15327961 EK |
1662 | |
1663 | We can start the check for this property by looking at the last statement of a | |
d59e3ab7 EK |
1664 | selection to see if it is a return statement (explicit or implicit) or a throw |
1665 | statement. If this is the case, then the property holds, assuming the selected | |
1666 | code does not contain any compilation errors. All execution paths within the | |
1667 | selection should end in either this, or another, return or throw statement. | |
15327961 EK |
1668 | \todoin{State somewhere that we assume no compilation errors?} |
1669 | ||
1670 | If the last statement of the selection is not a return or throw, the execution | |
d59e3ab7 EK |
1671 | of it must eventually end in one for the selection to be legal. This means that |
1672 | all branches of the last statement of every branch must end in a return or | |
1673 | throw. Given this recursive definition, there are only five types of statements | |
1674 | that are guaranteed to end in a return or throw if their child branches does. | |
1675 | All other statements would have to be considered illegal. The first three: | |
1676 | Block-statements, labeled statements and do-statements are all kinds of | |
1677 | fall-through statements that always gets their body executed. Do-statements | |
1678 | would not make much sense if written such that they | |
15327961 EK |
1679 | always ends after the first round of execution of their body, but that is not |
1680 | our concern. The remaining two statements that can end in a return or throw are | |
1681 | if-statements and try-statements. | |
1682 | ||
cb903a13 | 1683 | For an if-statement, the rule is that if its then-part does not contain any |
d59e3ab7 | 1684 | return or throw statements, this is considered illegal. If the then-part does |
cb903a13 EK |
1685 | contain a return or throw, the else-part is checked. If its else-part is |
1686 | non-existent, or it does not contain any return or throw statements, the | |
1687 | statement is considered illegal. If an if-statement is not considered illegal, | |
d59e3ab7 | 1688 | the bodies of its two parts must be checked. |
15327961 | 1689 | |
cb903a13 EK |
1690 | Try-statements are handled much the same way as if-statements. The body of a |
1691 | try-statement must contain a return or throw. The same applies to its catch | |
1692 | clauses and finally body. | |
1c521a77 | 1693 | |
11d9e6e1 EK |
1694 | \subsection{Ambiguous return values} |
1695 | The problem with ambiguous return values arise when a selection is chosen to be | |
1696 | extracted into a new method, but it needs to return more than one value from | |
1697 | that method. | |
1698 | ||
1699 | This problem occurs in two situations. The first situation arise when there is | |
1700 | more than one local variable that is both assigned to within a selection and | |
1701 | also referenced after the selection. The other situation occur when there is | |
1702 | only one such assignment, but the selection also contain return statements. | |
1703 | ||
1704 | Therefore we must examine the selection for assignments to local variables that | |
1705 | are referenced after the text selection. Then we must verify that not more than | |
1706 | one such reference is done, or zero if any return statements are found. | |
1707 | ||
83f12332 EK |
1708 | \subsection{Illegal statements} |
1709 | An illegal statement may be a statement that is of a type that is never allowed, | |
8fa89d14 EK |
1710 | or it may be a statement of a type that is only allowed if certain conditions |
1711 | are true. | |
83f12332 EK |
1712 | |
1713 | Any use of the \var{super} keyword is prohibited, since its meaning is altered | |
1714 | when moving a method to another class. | |
1715 | ||
1716 | For a \emph{break} statement, there are two situations to consider: A break | |
1717 | statement with or without a label. If the break statement has a label, it is | |
1718 | checked that whole of the labeled statement is inside the selection. If the | |
1719 | break statement does not have a label attached to it, it is checked that its | |
1720 | innermost enclosing loop or switch statement also is inside the selection. | |
1721 | ||
1722 | The situation for a \emph{continue} statement is the same as for a break | |
1723 | statement, except that it is not allowed inside switch statements. | |
1724 | ||
1725 | Regarding \emph{assignments}, two types of assignments are allowed: Assignments | |
1726 | to non-final variables and assignments to array access. All other assignments | |
1727 | are regarded illegal. | |
1728 | ||
1729 | \todoin{Expand with more illegal statements and/or conclude that I did not have | |
9503a520 EK |
1730 | time to analyze all statement types.} |
1731 | ||
01d46361 EK |
1732 | \section{Disqualifying selections from the |
1733 | example}\label{sec:disqualifyingExample} | |
9503a520 EK |
1734 | Among the selections we found for the code in \myref{lst:grandExample}, not many |
1735 | of them must be disqualified on the basis of containing something illegal. The | |
1736 | only statement causing trouble is the break statement in line 18. None of the | |
1737 | selections on nesting level 2 can contain this break statement, since the | |
1738 | innermost switch statement is not inside any of these selections. | |
83f12332 | 1739 | |
9503a520 EK |
1740 | This means that the text selections $(18)$, $(16,18)$ and $(17,18)$ can be |
1741 | excluded from further consideration, and we are left with the following | |
1742 | selections. | |
81cf9554 | 1743 | |
9503a520 EK |
1744 | \begin{description} |
1745 | \item[Level 1 (10 selections)] \hfill \\ | |
1746 | $\{(5,9), (11), (12), (14,21), (5,11), (5,12), (5,21), (11,12), | |
1747 | (11,21), \\(12,21)\}$ | |
1748 | ||
1749 | \item[Level 2 (10 selections)] \hfill \\ | |
1750 | $\{(6), (7), (8), (6,7), (6,8), (7,8), (16), (17), (16,17), (20)\}$ | |
1751 | \end{description} | |
1752 | ||
1753 | \section{Finding a move target} | |
1754 | In the analysis needed to perform the \ExtractAndMoveMethod refactoring | |
1755 | automatically, the selection we choose is found among all the selections that | |
1756 | has a possible move target. Therefore, the best possible move target must be | |
1757 | found for all the candidate selections, so that we are able to sort out the | |
1758 | selection that is best suited for the refactoring. | |
1759 | ||
1760 | To find the best move target for a specific text selection, we first need to | |
1761 | find all the possible targets. Since the target must be a local variable or a | |
1762 | field, we are basically looking for names within the selection; names that | |
1763 | represents references to variables. | |
1764 | ||
1765 | The names we are looking for, we call prefixes. This is because we are not | |
1766 | interested in names that occur in the middle of a dot-separated sequence of | |
1767 | names. We are only interested in names that constitutes prefixes of other names, | |
1768 | possibly themselves. The reason for this, is that two lexically equal names need | |
1769 | not be referencing the same variable, if they themselves are not referenced via | |
1770 | the same prefix. Consider the two method calls \code{a.x.foo()} and | |
1771 | \code{b.x.foo()}. Here, the two references to \code{x}, in the middle of the | |
1772 | qualified names both preceding \code{foo()}, are not referencing the same | |
1773 | variable. Even though the variables may share the type, and the method | |
1774 | \method{foo} thus is the same for both, we would not know through which of the | |
1775 | variables \var{a} or \var{b} we should call the extracted method. | |
1776 | ||
1777 | The possible move targets are then the prefixes that are not among a subset of | |
1778 | the prefixes that are not valid move targets \see{s:unfixes}. Also, prefixes | |
1779 | that are just simple names, and have only one occurrence, are left out. This is | |
1780 | because they are not going to have any positive effect on coupling between | |
01d46361 | 1781 | classes, and are only going to increase the complexity of the code. |
9503a520 EK |
1782 | |
1783 | For finding the best move target among these safe prefixes, a simple heuristic | |
1784 | is used. It is as simple as choosing the prefix that is most frequently | |
1785 | referenced within the selection. | |
1786 | ||
1787 | \section{Unfixes}\label{s:unfixes} | |
1788 | The prefixes that are not valid as move targets are called unfixes. | |
1789 | ||
1790 | An unfix can be a name that is assigned to within a selection. The reason that | |
1791 | this cannot be allowed, is that the result would be an assignment to the | |
1792 | \type{this} keyword, which is not valid in Java \see{eclipse_bug_420726}. | |
1793 | ||
1794 | Prefixes that originates from variable declarations within the same selection | |
1795 | are also considered unfixes. This is because when a method is moved, it needs to | |
1796 | be called through a variable. If this variable is also declared within the | |
1797 | method that is to be moved, this obviously cannot be done. | |
1798 | ||
1799 | Also considered as unfixes are variable references that are of types that are | |
1800 | not suitable for moving methods to. This can either be because it is not | |
1801 | physically possible to move a method to the desired class or that it will cause | |
1802 | compilation errors by doing so. | |
1803 | ||
1804 | If the type binding for a name is not resolved it is considered and unfix. The | |
1805 | same applies to types that is only found in compiled code, so they have no | |
1806 | underlying source that is accessible to us. (E.g. the \type{java.lang.String} | |
1807 | class.) | |
1808 | ||
1809 | Interfaces types are not suitable as targets. This is simply because interfaces | |
1810 | in Java cannot contain methods with bodies. (This thesis does not deal with | |
1811 | features of Java versions later than Java 7. Java 8 has interfaces with default | |
1812 | implementations of methods.) | |
1813 | ||
1814 | Neither are local types allowed. This accounts for both local and anonymous | |
1815 | classes. Anonymous classes are effectively the same as interface types with | |
1816 | respect to unfixes. Local classes could in theory be used as targets, but this | |
1817 | is not possible due to limitations of the way the \refa{Extract and Move Method} | |
1818 | refactoring has to be implemented. The problem is that the refactoring is done | |
1819 | in two steps, so the intermediate state between the two refactorings would not | |
1820 | be legal Java code. In the intermediate step for the case where a local class is | |
1821 | the move target, the extracted method would need to take the local class as a | |
1822 | parameter. This new method would need to live in the scope of the declaring | |
1823 | class of the originating method. The local class would then not be in the scope | |
1824 | of the extracted method, thus bringing the source code into an illegal state. | |
1825 | One could imagine that the method was extracted and moved in one operation, | |
1826 | without an intermediate state. Then it would make sense to include variables | |
1827 | with types of local classes in the set of legal targets, since the local classes | |
1828 | would then be in the scopes of the method calls. If this makes any difference | |
1829 | for software metrics that measure coupling would be a different discussion. | |
1830 | ||
1831 | ||
1832 | \begin{listing}[htb] | |
1833 | \begin{multicols}{2} | |
1834 | \begin{minted}[frame=topline,label=Before,framesep=\mintedframesep]{java} | |
1835 | void declaresLocalClass() { | |
1836 | class LocalClass { | |
1837 | void foo() {} | |
1838 | void bar() {} | |
1839 | } | |
1840 | ||
1841 | LocalClass inst = | |
1842 | new LocalClass(); | |
1843 | inst.foo(); | |
1844 | inst.bar(); | |
1845 | } | |
1846 | \end{minted} | |
1847 | ||
1848 | \columnbreak | |
1849 | ||
1850 | \begin{minted}[frame=topline,label={After Extract | |
1851 | Method},framesep=\mintedframesep]{java} | |
1852 | void declaresLocalClass() { | |
1853 | class LocalClass { | |
1854 | void foo() {} | |
1855 | void bar() {} | |
1856 | } | |
1857 | ||
1858 | LocalClass inst = | |
1859 | new LocalClass(); | |
1860 | fooBar(inst); | |
1861 | } | |
81cf9554 | 1862 | |
9503a520 EK |
1863 | // Intermediate step |
1864 | void fooBar(LocalClass inst) { | |
1865 | inst.foo(); | |
1866 | inst.bar(); | |
1867 | } | |
1868 | \end{minted} | |
1869 | \end{multicols} | |
1870 | \caption{When the \refa{Extract and Move Method} tries to use a variable with a | |
1871 | local type as the move target, an intermediate step is performed that is not | |
1872 | allowed. Here: \type{LocalClass} is not in the scope of \method{fooBar} in its | |
1873 | intermediate location.} | |
1874 | \label{lst:extractMethod_LocalClass} | |
1875 | \end{listing} | |
83f12332 | 1876 | |
9503a520 | 1877 | The last class of names that are considered unfixes are names used in null |
01d46361 EK |
1878 | tests. These are tests that reads like this: if \code{<name>} equals \var{null} |
1879 | then do something. If allowing variables used in those kinds of expressions as | |
1880 | targets for moving methods, we would end up with code containing boolean | |
1881 | expressions like \code{this == null}, which would not be meaningful, since | |
1882 | \var{this} would never be \var{null}. | |
1883 | ||
4ea21603 | 1884 | \section{Finding the example selections that have possible targets} |
01d46361 EK |
1885 | We now pick up the thread from \myref{sec:disqualifyingExample} where we have a |
1886 | set of text selections that needs to be analyzed to find out if some of them are | |
1887 | suitable targets for the \ExtractAndMoveMethod refactoring. | |
1888 | ||
1889 | We start by analyzing the text selections for nesting level 2, because these | |
1890 | results can be used to reason about the selections for nesting level 1. First we | |
1891 | have all the single-statement selections. | |
1892 | ||
1893 | \begin{description} | |
1894 | \item[Selections $(6)$, $(8)$ and $(20)$.] \hfill \\ | |
1895 | All these selections have a prefix that contains a possible target, namely | |
e36eade0 EK |
1896 | the variable \var{a}. The problem is that the prefixes are only one segment |
1897 | long, and their frequency counts are only 1 as well. None of these | |
01d46361 EK |
1898 | selections are therefore considered as suitable candidates for the |
1899 | refactoring. | |
1900 | ||
1901 | \item[Selection $(7)$.] \hfill \\ | |
1902 | This selection contains the unfix \var{a}, and no other possible targets. | |
1903 | The reason for \var{a} being an unfix is that it is assigned to within the | |
1904 | selection. Selection $(7)$ is therefore unsuited as a refactoring candidate. | |
1905 | ||
1906 | \item[Selections $(16)$ and $(17)$.] \hfill \\ | |
1907 | These selections both have a possible target. The target for both selections | |
1908 | is the variable \var{b}. Both the prefixes have frequency 1. We denote this | |
1909 | with the new tuples $((16), \texttt{b.a}, f(1))$ and $((17), \texttt{b.a}, | |
1910 | f(1))$. They contain the selection, the prefix with the target and the | |
e36eade0 | 1911 | frequency for this prefix. |
01d46361 EK |
1912 | |
1913 | \end{description} | |
1914 | ||
1915 | Then we have all the text selections from level 2 that are composed of multiple | |
1916 | statements: | |
1917 | ||
1918 | \begin{description} | |
1919 | \item[Selections $(6,7)$, $(6,8)$ and $(7,8)$.] \hfill \\ | |
1920 | All these selections are disqualified for the reason that they contain the | |
1921 | unfix \var{a}, due to the assignment, and no other possible move targets. | |
1922 | ||
1923 | \item[Selection $(16,17)$.] \hfill \\ | |
e36eade0 EK |
1924 | This selection is the last selection that is not analyzed on nesting level |
1925 | 2. It contains only one possible move target, and that is the variable | |
1926 | \var{b}. It also contains only one prefix \var{b.a}, with frequency count | |
1927 | 2. Therefore we have a new candidate $((16,17), \texttt{b.a}, f(2))$. | |
01d46361 EK |
1928 | |
1929 | \end{description} | |
1930 | ||
1931 | Moving on to the text selections for nesting level 1, starting with the | |
1932 | single-statement selections: | |
1933 | ||
1934 | \begin{description} | |
1935 | \item[Selection $(5,9)$.] \hfill \\ | |
e36eade0 | 1936 | This selection contains two variable references that must be analyzed to see |
01d46361 EK |
1937 | if they are possible move candidates. The first one is the variable |
1938 | \var{bool}. This variable is of type \type{boolean}, that is a primary type | |
1939 | and therefore not possible to make any changes to. The second variable is | |
1940 | \var{a}. The variable \var{a} is an unfix in $(5,9)$, for the same reason as | |
1941 | in the selections $(6,7)$, $(7,8)$ and $(6,8)$. So selection $(5,9)$ | |
1942 | contains no possible move targets. | |
1943 | ||
1944 | \item[Selections $(11)$ and $(12)$.] \hfill \\ | |
1945 | These selections are disqualified for the same reasons as selections $(6)$ | |
1946 | and $(8)$. Their prefixes are one segment long and are referenced only one | |
1947 | time. | |
1948 | ||
1949 | \item[Selection $(14,21)$] \hfill \\ | |
1950 | This is the switch statement from \myref{lst:grandExample}. It contains the | |
1951 | relevant variable references \var{val}, \var{a} and \var{b}. The variable | |
1952 | \var{val} is a primary type, just as \var{bool}. The variable \var{a} is | |
1953 | only found in one statement, and in a prefix with only one segment, so it is | |
1954 | not considered to be a possible move target. The only variable left is | |
1955 | \var{b}. Just as in the selection $(16,17)$, \var{b} is part of the prefix | |
e36eade0 | 1956 | \code{b.a}, that has 2 appearances. We have found a new candidate $((14,21), |
01d46361 EK |
1957 | \texttt{b.a}, f(2))$. |
1958 | ||
1959 | \end{description} | |
1960 | ||
e36eade0 | 1961 | It remains to see if we get any new candidates by analyzing the multi-statement |
01d46361 | 1962 | text selections for nesting level 1: |
9503a520 | 1963 | |
01d46361 EK |
1964 | \begin{description} |
1965 | \item[Selections $(5,11)$ and $(5,12)$.] \hfill \\ | |
e36eade0 | 1966 | These selections are disqualified for the same reason as $(5,9)$. The only |
01d46361 EK |
1967 | possible move target \var{a} is an unfix. |
1968 | ||
1969 | \item[Selection $(5,21)$.] \hfill \\ | |
1970 | This is whole of the method body in \myref{lst:grandExample}. The variables | |
1971 | \var{a}, \var{bool} and \var{val} are either an unfix or primary types. The | |
1972 | variable \var{b} is the only possible move target, and we have, again, the | |
1973 | prefix \code{b.a} as the center of attention. The refactoring candidate is | |
1974 | $((5,21), \texttt{b.a}, f(2))$. | |
1975 | ||
1976 | \item[Selection $(11,12)$.] \hfill \\ | |
1977 | This small selection contains the prefix \code{a} with frequency 2, and no | |
1978 | unfixes. The candidate is $((11,12), \texttt{a}, f(2))$. | |
1979 | ||
1980 | \item[Selection $(11,21)$] \hfill \\ | |
1981 | This selection contains the selection $(11,12)$ in addition to the switch | |
1982 | statement. The selection has two possible move targets. The first one is | |
e36eade0 | 1983 | \var{b}, in a prefix with frequency 2. The second is \var{a}, although it |
01d46361 EK |
1984 | is in a simple prefix, it is referenced 3 times, and is therefore chosen |
1985 | as the target for the selection. The new candidate is $((11,21), | |
1986 | \texttt{a}, f(3))$. | |
1987 | ||
1988 | \item[Selection $(12,21)$.] \hfill \\ | |
1989 | This selection is similar to the previous $(11,21)$, only that \var{a} now | |
e36eade0 | 1990 | has frequency count 2. This means that the prefixes \code{a} and |
01d46361 EK |
1991 | \code{b.a} both have the count 2. Of the two, \code{b.a} is preferred, |
1992 | since it has more segments than \code{a}. Thus the candidate for this | |
1993 | selection is $((12,21), \texttt{b.a}, f(2))$. | |
1994 | ||
1995 | \end{description} | |
1996 | ||
1997 | For the method in \myref{lst:grandExample} we therefore have the following 8 | |
1998 | candidates for the \ExtractAndMoveMethod refactoring: $\{((16), \texttt{b.a}, | |
1999 | f(1)), ((17), \texttt{b.a}, f(1)), ((16,17), \texttt{b.a}, f(2)), ((14,21), | |
2000 | \texttt{b.a}, f(2)), \\ ((5,21), \texttt{b.a}, f(2)), ((11,12), \texttt{a}, | |
2001 | f(2)), ((11,21), \texttt{a}, f(3)), ((12,21), \texttt{b.a}, f(2))\}$. | |
2002 | ||
2003 | It now only remains to specify an order for these candidates, so we can choose | |
2004 | the most suitable one to refactor. | |
9503a520 | 2005 | |
01d46361 EK |
2006 | |
2007 | \section{Choosing the selection}\label{sec:choosingSelection} | |
9503a520 EK |
2008 | When choosing a selection between the text selections that have possible move |
2009 | targets, the selections need to be ordered. The criteria below are presented in | |
2010 | the order they are prioritized. If not one selection is favored over the other | |
2011 | for a concrete criterion, the selections are evaluated by the next criterion. | |
2012 | ||
2013 | \begin{enumerate} | |
2014 | \item The first criterion that is evaluated is whether a selection contains | |
01d46361 EK |
2015 | any unfixes or not. If selection \m{A} contains no unfixes, while selection |
2016 | \m{B} does, selection \m{A} is favored over selection \m{B}. This is | |
2017 | because, if we can, we want to avoid moving such as assignments and variable | |
2018 | declarations. This is done under the assumption that, if possible, avoiding | |
2019 | selections containing unfixes will make the code moved a little cleaner. | |
2020 | ||
2021 | \item The second criterion that is evaluated is whether a selection contains | |
2022 | multiple possible targets or not. If selection \m{A} has only one possible | |
2023 | target, and selection \m{B} has multiple, selection \m{A} is favored. If | |
2024 | both selections have multiple possible targets, they are considered equal | |
2025 | with respect to this criterion. The rationale for this heuristic is that we | |
2026 | would prefer not to introduce new couplings between classes when performing | |
2027 | the \ExtractAndMoveMethod refactoring. | |
9503a520 EK |
2028 | |
2029 | \item When evaluating this criterion, this is with the knowledge that | |
01d46361 EK |
2030 | selection \m{A} and \m{B} both have one possible target, or multiple |
2031 | possible targets. Then, if the move target candidate of selection \m{A} has | |
2032 | a higher reference count than the target candidate of selection \m{B}, | |
2033 | selection \m{A} is favored. The reason for this is that we would like to | |
2034 | move the selection that gets rid of the most references to another class. | |
9503a520 EK |
2035 | |
2036 | \item The last criterion is that if the frequencies of the targets chosen for | |
2037 | both selections are equal, the selection with the target that is part of the | |
2038 | prefix with highest number of segments is favored. This is done to favor | |
2039 | indirection. | |
2040 | ||
2041 | \end{enumerate} | |
2042 | ||
2043 | If none of the above mentioned criteria favors one selection over another, the | |
2044 | selections are considered to be equally good candidates for the | |
2045 | \ExtractAndMoveMethod refactoring. | |
0cc6a67d | 2046 | |
01d46361 EK |
2047 | \section{Concluding the example} |
2048 | For choosing one of the remaining selections, we need to order our candidates | |
2049 | after the criteria in the previous section. Below we have the candidates ordered | |
2050 | in descending order, with the ``best'' candidate first: | |
2051 | ||
2052 | \begin{multicols}{2} | |
2053 | \begin{enumerate} | |
2054 | \item $((16,17), \texttt{b.a}, f(2))$ | |
2055 | \item $((11,12), \texttt{a}, f(2))$ | |
2056 | \item $((16), \texttt{b.a}, f(1))$ | |
2057 | \item $((17), \texttt{b.a}, f(1))$ | |
2058 | ||
2059 | % With unfixes: | |
2060 | % Many possible targets | |
2061 | \item $((11,21), \texttt{a}, f(3))$ | |
2062 | \item $((5,21), \texttt{b.a}, f(2))$ | |
2063 | \item $((12,21), \texttt{b.a}, f(2))$ | |
2064 | \item $((14,21), \texttt{b.a}, f(2))$ | |
2065 | ||
2066 | \end{enumerate} | |
2067 | \end{multicols} | |
2068 | ||
2069 | \begin{comment} | |
2070 | 5 if (bool) { | |
2071 | 6 a.foo(); | |
2072 | 7 a = new A(); | |
2073 | 8 a.bar(); | |
2074 | 9 } | |
2075 | 10 | |
2076 | 11 a.foo(); | |
2077 | 12 a.bar(); | |
2078 | 13 | |
2079 | 14 switch (val) { | |
2080 | 15 case 1: | |
2081 | 16 b.a.foo(); | |
2082 | 17 b.a.bar(); | |
2083 | 18 break; | |
2084 | 19 default: | |
2085 | 20 a.foo(); | |
2086 | 21 } | |
2087 | \end{comment} | |
2088 | ||
9db7f4cf EK |
2089 | The candidates ordered 5-8 all have unfixes in them, therefore they are ordered |
2090 | last. None of the candidates ordered 1-4 have multiple possible targets. The | |
2091 | only interesting discussion is now why $(16,17)$ was favored over $(11,12)$. | |
2092 | This is because the prefix \code{b.a} enclosing the move target of selection | |
2093 | $(16,17)$ has one more segment than the prefix \code{a} of $(11,12)$. | |
2094 | ||
2095 | The selection is now extracted into a new method \method{gen\_123} and then | |
2096 | moved to class \type{B}, since that is the type of the variable \var{b} that is | |
2097 | the target from the chosen refactoring candidate. The name of the method has a | |
2098 | randomly generated suffix. In the refactoring implementation, the extracted | |
2099 | methods follow the pattern \code{generated\_<long>}, where \code{<long>} is a | |
2100 | pseudo-random long value. This is shortened here to make the example readable. | |
e36eade0 | 2101 | The result after the refactoring is shown in \myref{lst:grandExampleResult}. |
01d46361 EK |
2102 | |
2103 | \begin{listing}[htb] | |
2104 | \begin{multicols}{2} | |
2105 | \begin{minted}[linenos]{java} | |
2106 | class C { | |
9db7f4cf EK |
2107 | A a; B b; boolean bool; |
2108 | ||
2109 | void method(int val) { | |
2110 | if (bool) { | |
2111 | a.foo(); | |
2112 | a = new A(); | |
2113 | a.bar(); | |
2114 | } | |
2115 | ||
2116 | a.foo(); | |
2117 | a.bar(); | |
2118 | ||
2119 | switch (val) { | |
2120 | case 1: | |
2121 | b.gen_123(this); | |
2122 | break; | |
2123 | default: | |
2124 | a.foo(); | |
01d46361 | 2125 | } |
9db7f4cf | 2126 | } |
01d46361 EK |
2127 | } |
2128 | \end{minted} | |
2129 | ||
2130 | \columnbreak | |
2131 | ||
2132 | \begin{minted}[]{java} | |
2133 | public class B { | |
9db7f4cf | 2134 | A a; |
01d46361 | 2135 | |
9db7f4cf EK |
2136 | public void gen_123(C c) { |
2137 | a.foo(); | |
2138 | a.bar(); | |
2139 | } | |
01d46361 EK |
2140 | } |
2141 | \end{minted} | |
2142 | ||
2143 | \end{multicols} | |
9db7f4cf EK |
2144 | \caption{The result after refactoring the class \type{C} of |
2145 | \myref{lst:grandExample} with the \ExtractAndMoveMethod refactoring with | |
2146 | $((16,17), \texttt{b.a}, f(2))$ as input.} | |
01d46361 EK |
2147 | \label{lst:grandExampleResult} |
2148 | \end{listing} | |
2149 | ||
b851f7df EK |
2150 | \chapter{Refactorings in Eclipse JDT: Design and |
2151 | Shortcomings}\label{ch:jdt_refactorings} | |
5837a41f EK |
2152 | |
2153 | This chapter will deal with some of the design behind refactoring support in | |
b4d90424 EK |
2154 | \name{Eclipse}, and the JDT in specific. After which it will follow a section |
2155 | about shortcomings of the refactoring API in terms of composition of | |
2156 | refactorings. | |
055dca93 | 2157 | |
b0e80574 | 2158 | \section{Design} |
fe0a4c48 | 2159 | The refactoring world of \name{Eclipse} can in general be separated into two parts: The |
b289552b | 2160 | language independent part and the part written for a specific programming |
07e173d4 EK |
2161 | language -- the language that is the target of the supported refactorings. |
2162 | \todo{What about the language specific part?} | |
f041551b EK |
2163 | |
2164 | \subsection{The Language Toolkit} | |
1326eec6 EK |
2165 | The Language Toolkit\footnote{The content of this section is a mixture of |
2166 | written material from | |
2167 | \url{https://www.eclipse.org/articles/Article-LTK/ltk.html} and | |
2168 | \url{http://www.eclipse.org/articles/article.php?file=Article-Unleashing-the-Power-of-Refactoring/index.html}, | |
2169 | the LTK source code and my own memory.}, or LTK for short, is the framework that | |
fe0a4c48 | 2170 | is used to implement refactorings in \name{Eclipse}. It is language independent and |
1326eec6 EK |
2171 | provides the abstractions of a refactoring and the change it generates, in the |
2172 | form of the classes \typewithref{org.eclipse.ltk.core.refactoring}{Refactoring} | |
2173 | and \typewithref{org.eclipse.ltk.core.refactoring}{Change}. | |
2174 | ||
2175 | There are also parts of the LTK that is concerned with user interaction, but | |
2176 | they will not be discussed here, since they are of little value to us and our | |
2177 | use of the framework. We are primarily interested in the parts that can be | |
2178 | automated. | |
f041551b EK |
2179 | |
2180 | \subsubsection{The Refactoring Class} | |
2181 | The abstract class \type{Refactoring} is the core of the LTK framework. Every | |
2182 | refactoring that is going to be supported by the LTK have to end up creating an | |
2183 | instance of one of its subclasses. The main responsibilities of subclasses of | |
2184 | \type{Refactoring} is to implement template methods for condition checking | |
2185 | (\methodwithref{org.eclipse.ltk.core.refactoring.Refactoring}{checkInitialConditions} | |
2186 | and | |
2187 | \methodwithref{org.eclipse.ltk.core.refactoring.Refactoring}{checkFinalConditions}), | |
2188 | in addition to the | |
2189 | \methodwithref{org.eclipse.ltk.core.refactoring.Refactoring}{createChange} | |
07e173d4 EK |
2190 | method that creates and returns an instance of the \type{Change} class. |
2191 | ||
2192 | If the refactoring shall support that others participate in it when it is | |
2193 | executed, the refactoring has to be a processor-based | |
2194 | refactoring\typeref{org.eclipse.ltk.core.refactoring.participants.ProcessorBasedRefactoring}. | |
2195 | It then delegates to its given | |
2196 | \typewithref{org.eclipse.ltk.core.refactoring.participants}{RefactoringProcessor} | |
1326eec6 EK |
2197 | for condition checking and change creation. Participating in a refactoring can |
2198 | be useful in cases where the changes done to programming source code affects | |
2199 | other related resources in the workspace. This can be names or paths in | |
2200 | configuration files, or maybe one would like to perform additional logging of | |
2201 | changes done in the workspace. | |
f041551b EK |
2202 | |
2203 | \subsubsection{The Change Class} | |
07e173d4 EK |
2204 | This class is the base class for objects that is responsible for performing the |
2205 | actual workspace transformations in a refactoring. The main responsibilities for | |
2206 | its subclasses is to implement the | |
2207 | \methodwithref{org.eclipse.ltk.core.refactoring.Change}{perform} and | |
2208 | \methodwithref{org.eclipse.ltk.core.refactoring.Change}{isValid} methods. The | |
2209 | \method{isValid} method verifies that the change object is valid and thus can be | |
2210 | executed by calling its \method{perform} method. The \method{perform} method | |
2211 | performs the desired change and returns an undo change that can be executed to | |
2212 | reverse the effect of the transformation done by its originating change object. | |
2213 | ||
61420ef7 | 2214 | \subsubsection{Executing a Refactoring}\label{executing_refactoring} |
07e173d4 EK |
2215 | The life cycle of a refactoring generally follows two steps after creation: |
2216 | condition checking and change creation. By letting the refactoring object be | |
2217 | handled by a | |
2218 | \typewithref{org.eclipse.ltk.core.refactoring}{CheckConditionsOperation} that | |
2219 | in turn is handled by a | |
2220 | \typewithref{org.eclipse.ltk.core.refactoring}{CreateChangeOperation}, it is | |
2221 | assured that the change creation process is managed in a proper manner. | |
2222 | ||
2223 | The actual execution of a change object has to follow a detailed life cycle. | |
2224 | This life cycle is honored if the \type{CreateChangeOperation} is handled by a | |
2225 | \typewithref{org.eclipse.ltk.core.refactoring}{PerformChangeOperation}. If also | |
2226 | an undo manager\typeref{org.eclipse.ltk.core.refactoring.IUndoManager} is set | |
2227 | for the \type{PerformChangeOperation}, the undo change is added into the undo | |
2228 | history. | |
055dca93 | 2229 | |
b0e80574 | 2230 | \section{Shortcomings} |
80663734 | 2231 | This section is introduced naturally with a conclusion: The JDT refactoring |
5837a41f EK |
2232 | implementation does not facilitate composition of refactorings. |
2233 | \todo{refine}This section will try to explain why, and also identify other | |
2234 | shortcomings of both the usability and the readability of the JDT refactoring | |
2235 | source code. | |
80663734 EK |
2236 | |
2237 | I will begin at the end and work my way toward the composition part of this | |
2238 | section. | |
2239 | ||
5837a41f | 2240 | \subsection{Absence of Generics in Eclipse Source Code} |
80663734 | 2241 | This section is not only concerning the JDT refactoring API, but also large |
fe0a4c48 | 2242 | quantities of the \name{Eclipse} source code. The code shows a striking absence of the |
80663734 | 2243 | Java language feature of generics. It is hard to read a class' interface when |
5837a41f EK |
2244 | methods return objects or takes parameters of raw types such as \type{List} or |
2245 | \type{Map}. This sometimes results in having to read a lot of source code to | |
2246 | understand what is going on, instead of relying on the available interfaces. In | |
2247 | addition, it results in a lot of ugly code, making the use of typecasting more | |
2248 | of a rule than an exception. | |
2249 | ||
2250 | \subsection{Composite Refactorings Will Not Appear as Atomic Actions} | |
2251 | ||
2252 | \subsubsection{Missing Flexibility from JDT Refactorings} | |
2253 | The JDT refactorings are not made with composition of refactorings in mind. When | |
2254 | a JDT refactoring is executed, it assumes that all conditions for it to be | |
1326eec6 | 2255 | applied successfully can be found by reading source files that have been |
5837a41f EK |
2256 | persisted to disk. They can only operate on the actual source material, and not |
2257 | (in-memory) copies thereof. This constitutes a major disadvantage when trying to | |
1326eec6 EK |
2258 | compose refactorings, since if an exception occurs in the middle of a sequence |
2259 | of refactorings, it can leave the project in a state where the composite | |
2260 | refactoring was only partially executed. It makes it hard to discard the changes | |
5837a41f EK |
2261 | done without monitoring and consulting the undo manager, an approach that is not |
2262 | bullet proof. | |
2263 | ||
2264 | \subsubsection{Broken Undo History} | |
2265 | When designing a composed refactoring that is to be performed as a sequence of | |
2266 | refactorings, you would like it to appear as a single change to the workspace. | |
2267 | This implies that you would also like to be able to undo all the changes done by | |
2268 | the refactoring in a single step. This is not the way it appears when a sequence | |
2269 | of JDT refactorings is executed. It leaves the undo history filled up with | |
2270 | individual undo actions corresponding to every single JDT refactoring in the | |
fe0a4c48 | 2271 | sequence. This problem is not trivial to handle in \name{Eclipse} |
e123ab03 | 2272 | \see{hacking_undo_history}. |
5837a41f | 2273 | |
80663734 | 2274 | |
a7514fbd | 2275 | |
b0e80574 EK |
2276 | \chapter{Composite Refactorings in Eclipse} |
2277 | ||
2278 | \section{A Simple Ad Hoc Model} | |
fe0a4c48 | 2279 | As pointed out in \myref{ch:jdt_refactorings}, the \name{Eclipse} JDT refactoring model |
8b6b22c8 EK |
2280 | is not very well suited for making composite refactorings. Therefore a simple |
2281 | model using changer objects (of type \type{RefaktorChanger}) is used as an | |
fe0a4c48 | 2282 | abstraction layer on top of the existing \name{Eclipse} refactorings, instead of |
50954fde EK |
2283 | extending the \typewithref{org.eclipse.ltk.core.refactoring}{Refactoring} class. |
2284 | ||
2285 | The use of an additional abstraction layer is a deliberate choice. It is due to | |
2286 | the problem of creating a composite | |
2287 | \typewithref{org.eclipse.ltk.core.refactoring}{Change} that can handle text | |
2288 | changes that interfere with each other. Thus, a \type{RefaktorChanger} may, or | |
2289 | may not, take advantage of one or more existing refactorings, but it is always | |
2290 | intended to make a change to the workspace. | |
2291 | ||
2292 | \subsection{A typical \type{RefaktorChanger}} | |
2293 | The typical refaktor changer class has two responsibilities, checking | |
2294 | preconditions and executing the requested changes. This is not too different | |
2295 | from the responsibilities of an LTK refactoring, with the distinction that a | |
2296 | refaktor changer also executes the change, while an LTK refactoring is only | |
2297 | responsible for creating the object that can later be used to do the job. | |
2298 | ||
2299 | Checking of preconditions is typically done by an | |
2300 | \typewithref{no.uio.ifi.refaktor.analyze.analyzers}{Analyzer}. If the | |
2301 | preconditions validate, the upcoming changes are executed by an | |
2302 | \typewithref{no.uio.ifi.refaktor.change.executors}{Executor}. | |
b0e80574 EK |
2303 | |
2304 | \section{The Extract and Move Method Refactoring} | |
61420ef7 EK |
2305 | %The Extract and Move Method Refactoring is implemented mainly using these |
2306 | %classes: | |
2307 | %\begin{itemize} | |
2308 | % \item \type{ExtractAndMoveMethodChanger} | |
2309 | % \item \type{ExtractAndMoveMethodPrefixesExtractor} | |
2310 | % \item \type{Prefix} | |
2311 | % \item \type{PrefixSet} | |
2312 | %\end{itemize} | |
2313 | ||
2314 | \subsection{The Building Blocks} | |
2315 | This is a composite refactoring, and hence is built up using several primitive | |
b5c7bb1b EK |
2316 | refactorings. These basic building blocks are, as its name implies, the |
2317 | \ExtractMethod refactoring\citing{refactoring} and the \MoveMethod | |
fe0a4c48 | 2318 | refactoring\citing{refactoring}. In \name{Eclipse}, the implementations of these |
b5c7bb1b | 2319 | refactorings are found in the classes |
61420ef7 EK |
2320 | \typewithref{org.eclipse.jdt.internal.corext.refactoring.code}{ExtractMethodRefactoring} |
2321 | and | |
2322 | \typewithref{org.eclipse.jdt.internal.corext.refactoring.structure}{MoveInstanceMethodProcessor}, | |
2323 | where the last class is designed to be used together with the processor-based | |
2324 | \typewithref{org.eclipse.ltk.core.refactoring.participants}{MoveRefactoring}. | |
2325 | ||
2326 | \subsubsection{The ExtractMethodRefactoring Class} | |
2327 | This class is quite simple in its use. The only parameters it requires for | |
2328 | construction is a compilation | |
2329 | unit\typeref{org.eclipse.jdt.core.ICompilationUnit}, the offset into the source | |
2330 | code where the extraction shall start, and the length of the source to be | |
2331 | extracted. Then you have to set the method name for the new method together with | |
50954fde | 2332 | its visibility and some not so interesting parameters. |
61420ef7 EK |
2333 | |
2334 | \subsubsection{The MoveInstanceMethodProcessor Class} | |
fe0a4c48 EK |
2335 | For the \refa{Move Method}, the processor requires a little more advanced input than |
2336 | the class for the \refa{Extract Method}. For construction it requires a method | |
50954fde EK |
2337 | handle\typeref{org.eclipse.jdt.core.IMethod} for the method that is to be moved. |
2338 | Then the target for the move have to be supplied as the variable binding from a | |
2339 | chosen variable declaration. In addition to this, one have to set some | |
2340 | parameters regarding setters/getters, as well as delegation. | |
61420ef7 | 2341 | |
50954fde EK |
2342 | To make a working refactoring from the processor, one have to create a |
2343 | \type{MoveRefactoring} with it. | |
b0e80574 | 2344 | |
356782a0 | 2345 | \subsection{The ExtractAndMoveMethodChanger} |
50954fde | 2346 | |
61420ef7 | 2347 | The \typewithref{no.uio.ifi.refaktor.changers}{ExtractAndMoveMethodChanger} |
50954fde EK |
2348 | class is a subclass of the class |
2349 | \typewithref{no.uio.ifi.refaktor.changers}{RefaktorChanger}. It is responsible | |
2350 | for analyzing and finding the best target for, and also executing, a composition | |
fe0a4c48 | 2351 | of the \refa{Extract Method} and \refa{Move Method} refactorings. This particular changer is |
50954fde EK |
2352 | the one of my changers that is closest to being a true LTK refactoring. It can |
2353 | be reworked to be one if the problems with overlapping changes are resolved. The | |
2354 | changer requires a text selection and the name of the new method, or else a | |
2355 | method name will be generated. The selection has to be of the type | |
2356 | \typewithref{no.uio.ifi.refaktor.utils}{CompilationUnitTextSelection}. This | |
2357 | class is a custom extension to | |
2358 | \typewithref{org.eclipse.jface.text}{TextSelection}, that in addition to the | |
2359 | basic offset, length and similar methods, also carry an instance of the | |
2360 | underlying compilation unit handle for the selection. | |
2361 | ||
ae138b9d EK |
2362 | \subsubsection{The |
2363 | \type{ExtractAndMoveMethodAnalyzer}}\label{extractAndMoveMethodAnalyzer} | |
50954fde EK |
2364 | The analysis and precondition checking is done by the |
2365 | \typewithref{no.uio.ifi.refaktor.analyze.analyzers}{ExtractAnd\-MoveMethodAnalyzer}. | |
2366 | First is check whether the selection is a valid selection or not, with respect | |
2367 | to statement boundaries and that it actually contains any selections. Then it | |
2368 | checks the legality of both extracting the selection and also moving it to | |
95c0f364 EK |
2369 | another class. This checking of is performed by a range of checkers |
2370 | \see{checkers}. If the selection is approved as legal, it is analyzed to find | |
2371 | the presumably best target to move the extracted method to. | |
50954fde EK |
2372 | |
2373 | For finding the best suitable target the analyzer is using a | |
2374 | \typewithref{no.uio.ifi.refaktor.analyze.collectors}{PrefixesCollector} that | |
ae138b9d EK |
2375 | collects all the possible candidate targets for the refactoring. All the |
2376 | non-candidates is found by an | |
50954fde | 2377 | \typewithref{no.uio.ifi.refaktor.analyze.collectors}{UnfixesCollector} that |
b8fce5af | 2378 | collects all the targets that will give some kind of error if used. (For |
3ab3e132 | 2379 | details about the property collectors, see \myref{propertyCollectors}.) All |
b8fce5af | 2380 | prefixes (and unfixes) are represented by a |
a6415293 EK |
2381 | \typewithref{no.uio.ifi.refaktor.extractors}{Prefix}, and they are collected |
2382 | into sets of prefixes. The safe prefixes is found by subtracting from the set of | |
b8fce5af | 2383 | candidate prefixes the prefixes that is enclosing any of the unfixes. A prefix |
a6415293 | 2384 | is enclosing an unfix if the unfix is in the set of its sub-prefixes. As an |
01d46361 EK |
2385 | example, \code{``a.b''} is enclosing \code{``a''}, as is \code{``a''}. The safe |
2386 | prefixes is unified in a \type{PrefixSet}. If a prefix has only one occurrence, | |
2387 | and is a simple expression, it is considered unsuitable as a move target. This | |
2388 | occurs in statements such as \code{``a.foo()''}. For such statements it bares no | |
2389 | meaning to extract and move them. It only generates an extra method and the | |
2390 | calling of it. | |
50954fde | 2391 | |
fbaa89ce EK |
2392 | The most suitable target for the refactoring is found by finding the prefix with |
2393 | the most occurrences. If two prefixes have the same occurrence count, but they | |
01d46361 | 2394 | differ in the number of segments, the one with most segments is chosen. |
fbaa89ce | 2395 | |
c8088eec EK |
2396 | \subsubsection{The |
2397 | \type{ExtractAndMoveMethodExecutor}}\label{extractAndMoveMethodExecutor} | |
50954fde EK |
2398 | If the analysis finds a possible target for the composite refactoring, it is |
2399 | executed by an | |
2400 | \typewithref{no.uio.ifi.refaktor.change.executors}{ExtractAndMoveMethodExecutor}. | |
2401 | It is composed of the two executors known as | |
2402 | \typewithref{no.uio.ifi.refaktor.change.executors}{ExtractMethodRefactoringExecutor} | |
2403 | and | |
2404 | \typewithref{no.uio.ifi.refaktor.change.executors}{MoveMethodRefactoringExecutor}. | |
2405 | The \type{ExtractAndMoveMethodExecutor} is responsible for gluing the two | |
3727b75b | 2406 | together by feeding the \type{MoveMethod\-RefactoringExecutor} with the |
222d172b | 2407 | resources needed after executing the extract method refactoring. |
50954fde EK |
2408 | |
2409 | \subsubsection{The \type{ExtractMethodRefactoringExecutor}} | |
2410 | This executor is responsible for creating and executing an instance of the | |
2411 | \type{ExtractMethodRefactoring} class. It is also responsible for collecting | |
2412 | some post execution resources that can be used to find the method handle for the | |
2413 | extracted method, as well as information about its parameters, including the | |
2414 | variable they originated from. | |
2415 | ||
2416 | \subsubsection{The \type{MoveMethodRefactoringExecutor}} | |
2417 | This executor is responsible for creating and executing an instance of the | |
2418 | \type{MoveRefactoring}. The move refactoring is a processor-based refactoring, | |
fe0a4c48 | 2419 | and for the \refa{Move Method} refactoring it is the \type{MoveInstanceMethodProcessor} |
50954fde EK |
2420 | that is used. |
2421 | ||
2422 | The handle for the method to be moved is found on the basis of the information | |
fe0a4c48 | 2423 | gathered after the execution of the \refa{Extract Method} refactoring. The only |
50954fde EK |
2424 | information the \type{ExtractMethodRefactoring} is sharing after its execution, |
2425 | regarding find the method handle, is the textual representation of the new | |
2426 | method signature. Therefore it must be parsed, the strings for types of the | |
2427 | parameters must be found and translated to a form that can be used to look up | |
2428 | the method handle from its type handle. They have to be on the unresolved | |
2429 | form.\todo{Elaborate?} The name for the type is found from the original | |
2430 | selection, since an extracted method must end up in the same type as the | |
2431 | originating method. | |
2432 | ||
fe0a4c48 | 2433 | When analyzing a selection prior to performing the \refa{Extract Method} refactoring, a |
50954fde EK |
2434 | target is chosen. It has to be a variable binding, so it is either a field or a |
2435 | local variable/parameter. If the target is a field, it can be used with the | |
2436 | \type{MoveInstanceMethodProcessor} as it is, since the extracted method still is | |
2437 | in its scope. But if the target is local to the originating method, the target | |
2438 | that is to be used for the processor must be among its parameters. Thus the | |
2439 | target must be found among the extracted method's parameters. This is done by | |
2440 | finding the parameter information object that corresponds to the parameter that | |
2441 | was declared on basis of the original target's variable when the method was | |
2442 | extracted. (The extracted method must take one such parameter for each local | |
2443 | variable that is declared outside the selection that is extracted.) To match the | |
2444 | original target with the correct parameter information object, the key for the | |
2445 | information object is compared to the key from the original target's binding. | |
2446 | The source code must then be parsed to find the method declaration for the | |
2447 | extracted method. The new target must be found by searching through the | |
2448 | parameters of the declaration and choose the one that has the same type as the | |
2449 | old binding from the parameter information object, as well as the same name that | |
2450 | is provided by the parameter information object. | |
2451 | ||
2452 | ||
356782a0 EK |
2453 | \subsection{The |
2454 | SearchBasedExtractAndMoveMethodChanger}\label{searchBasedExtractAndMoveMethodChanger} | |
c8088eec EK |
2455 | The |
2456 | \typewithref{no.uio.ifi.refaktor.change.changers}{SearchBasedExtractAndMoveMethodChanger} | |
2457 | is a changer whose purpose is to automatically analyze a method, and execute the | |
2458 | \ExtractAndMoveMethod refactoring on it if it is a suitable candidate for the | |
2459 | refactoring. | |
2460 | ||
2461 | First, the \typewithref{no.uio.ifi.refaktor.analyze.analyzers}{SearchBasedExtractAndMoveMethodAnalyzer} is used | |
2462 | to analyze the method. If the method is found to be a candidate, the result from | |
2463 | the analysis is fed to the \type{ExtractAndMoveMethodExecutor}, whose job is to | |
2464 | execute the refactoring \see{extractAndMoveMethodExecutor}. | |
2465 | ||
2466 | \subsubsection{The SearchBasedExtractAndMoveMethodAnalyzer} | |
2467 | This analyzer is responsible for analyzing all the possible text selections of a | |
2468 | method and then choose the best result out of the analysis results that is, by | |
2469 | the analyzer, considered to be the potential candidates for the Extract and Move | |
2470 | Method refactoring. | |
2471 | ||
2472 | Before the analyzer is able to work with the text selections of a method, it | |
2473 | needs to generate them. To do this, it parses the method to obtain a | |
2474 | \type{MethodDeclaration} for it \see{astEclipse}. Then there is a statement | |
2475 | lists creator that creates statements lists of the different groups of | |
2476 | statements in the body of the method declaration. A text selections generator | |
2477 | generates text selections of all the statement lists for the analyzer to work | |
2478 | with. | |
2479 | ||
2480 | \paragraph{The statement lists creator} | |
b851f7df EK |
2481 | is responsible for generating lists of statements for all the possible nesting |
2482 | levels of statements in the method. The statement lists creator is implemented | |
2483 | as an AST visitor \see{astVisitor}. It generates lists of statements by visiting | |
2484 | all the blocks in the method declaration and stores their statements in a | |
2485 | collection of statement lists. In addition, it visits all of the other | |
2486 | statements that can have a statement as a child, such as the different control | |
2487 | structures and the labeled statement. | |
c8088eec EK |
2488 | |
2489 | The switch statement is the only kind of statement that is not straight forward | |
2490 | to obtain the child statements from. It stores all of its children in a flat | |
2491 | list. Its switch case statements are included in this list. This means that | |
2492 | there are potential statement lists between all of these case statements. The | |
2493 | list of statements from a switch statement is therefore traversed, and the | |
2494 | statements between the case statements are grouped as separate lists. | |
2495 | ||
44005fe3 EK |
2496 | The highlighted part of \myref{lst:grandExample} shows an example of how the |
2497 | statement lists creator would separate a method body into lists of statements. | |
c8088eec EK |
2498 | |
2499 | \paragraph{The text selections generator} generates text selections for each | |
b851f7df EK |
2500 | list of statements from the statement lists creator. The generator generates a |
2501 | text selection for every sub-sequence of statements in a list. For a sequence of | |
2502 | statements, the first statement and the last statement span out a text | |
2503 | selection. | |
c8088eec EK |
2504 | |
2505 | In practice, the text selections are calculated by only one traversal of the | |
2506 | statement list. There is a set of generated text selections. For each statement, | |
2507 | there is created a temporary set of selections, in addition to a text selection | |
2508 | based on the offset and length of the statement. This text selection is added to | |
2509 | the temporary set. Then the new selection is added with every selection from the | |
2510 | set of generated text selections. These new selections are added to the | |
2511 | temporary set. Then the temporary set of selections is added to the set of | |
2512 | generated text selections. The result of adding two text selections is a new | |
2513 | text selection spanned out by the two addends. | |
2514 | ||
53be7239 | 2515 | \begin{comment} |
0fa64de5 EK |
2516 | \begin{listing}[h] |
2517 | \def\charwidth{5.7pt} | |
2518 | \def\indent{4*\charwidth} | |
2519 | \def\lineheight{\baselineskip} | |
2520 | \def\mintedtop{\lineheight} | |
2521 | ||
2522 | \begin{tikzpicture}[overlay, yscale=-1] | |
2523 | \tikzstyle{overlaybox}=[fill=lightgray,opacity=0.2] | |
2524 | ||
2525 | \draw[overlaybox] (2*\charwidth,\mintedtop) rectangle | |
2526 | +(18*\charwidth,\lineheight); | |
2527 | ||
2528 | \draw[overlaybox] (2*\charwidth,\mintedtop+\lineheight) rectangle | |
2529 | +(18*\charwidth,\lineheight); | |
2530 | ||
2531 | \draw[overlaybox] (2*\charwidth,\mintedtop+3*\lineheight) rectangle | |
2532 | +(18*\charwidth,\lineheight); | |
2533 | ||
2534 | \draw[overlaybox] (\indent-3*\charwidth,\mintedtop) rectangle | |
2535 | +(20*\charwidth,2*\lineheight); | |
2536 | ||
2537 | \draw[overlaybox] (3*\charwidth,\mintedtop+\lineheight) rectangle | |
2538 | +(16*\charwidth,3*\lineheight); | |
2539 | ||
2540 | \draw[overlaybox] (\indent,\mintedtop) rectangle | |
2541 | +(14*\charwidth,4*\lineheight); | |
2542 | \end{tikzpicture} | |
2543 | \begin{minted}{java} | |
2544 | statement one; | |
2545 | statement two; | |
2546 | ... | |
2547 | statement k; | |
2548 | \end{minted} | |
2549 | \caption{Example of how the text selections generator would generate text | |
2550 | selections based on a lists of statements. Each highlighted rectangle | |
2551 | represents a text selection.} | |
2552 | \label{lst:textSelectionsExample} | |
2553 | \end{listing} | |
021508ad EK |
2554 | \todoin{fix \myref{lst:textSelectionsExample}? Text only? All |
2555 | sub-sequences\ldots} | |
53be7239 | 2556 | \end{comment} |
0fa64de5 | 2557 | |
ae138b9d EK |
2558 | \paragraph{Finding the candidate} for the refactoring is done by analyzing all |
2559 | the generated text selection with the \type{ExtractAndMoveMethodAnalyzer} | |
2560 | \see{extractAndMoveMethodAnalyzer}. If the analyzer generates a useful result, | |
2561 | an \type{ExtractAndMoveMethodCandidate} is created from it, that is kept in a | |
2562 | list of potential candidates. If no candidates are found, the | |
2563 | \type{NoTargetFoundException} is thrown. | |
2564 | ||
2565 | Since only one of the candidates can be chosen, the analyzer must sort out which | |
2566 | candidate to choose. The sorting is done by the static \method{sort} method of | |
2567 | \type{Collections}. The comparison in this sorting is done by an | |
2568 | \type{ExtractAndMoveMethodCandidateComparator}. | |
2569 | \todoin{Write about the | |
2570 | ExtractAndMoveMethodCandidateComparator/FavorNoUnfixesCandidateComparator} | |
2571 | ||
356782a0 | 2572 | |
b0e80574 | 2573 | \subsection{The Prefix Class} |
a6415293 EK |
2574 | This class exists mainly for holding data about a prefix, such as the expression |
2575 | that the prefix represents and the occurrence count of the prefix within a | |
2576 | selection. In addition to this, it has some functionality such as calculating | |
2577 | its sub-prefixes and intersecting it with another prefix. The definition of the | |
2578 | intersection between two prefixes is a prefix representing the longest common | |
2579 | expression between the two. | |
2580 | ||
b0e80574 | 2581 | \subsection{The PrefixSet Class} |
a6415293 EK |
2582 | A prefix set holds elements of type \type{Prefix}. It is implemented with the |
2583 | help of a \typewithref{java.util}{HashMap} and contains some typical set | |
2584 | operations, but it does not implement the \typewithref{java.util}{Set} | |
2585 | interface, since the prefix set does not need all of the functionality a | |
2586 | \type{Set} requires to be implemented. In addition It needs some other | |
2587 | functionality not found in the \type{Set} interface. So due to the relatively | |
2588 | limited use of prefix sets, and that it almost always needs to be referenced as | |
2589 | such, and not a \type{Set<Prefix>}, it remains as an ad hoc solution to a | |
2590 | concrete problem. | |
2591 | ||
2592 | There are two ways adding prefixes to a \type{PrefixSet}. The first is through | |
2593 | its \method{add} method. This works like one would expect from a set. It adds | |
2594 | the prefix to the set if it does not already contain the prefix. The other way | |
2595 | is to \emph{register} the prefix with the set. When registering a prefix, if the | |
2596 | set does not contain the prefix, it is just added. If the set contains the | |
2597 | prefix, its count gets incremented. This is how the occurrence count is handled. | |
2598 | ||
2599 | The prefix set also computes the set of prefixes that is not enclosing any | |
2600 | prefixes of another set. This is kind of a set difference operation only for | |
2601 | enclosing prefixes. | |
b0e80574 | 2602 | |
5837a41f EK |
2603 | \subsection{Hacking the Refactoring Undo |
2604 | History}\label{hacking_undo_history} | |
a6415293 | 2605 | \todoin{Where to put this section?} |
8fae7b44 EK |
2606 | |
2607 | As an attempt to make multiple subsequent changes to the workspace appear as a | |
2608 | single action (i.e. make the undo changes appear as such), I tried to alter | |
2609 | the undo changes\typeref{org.eclipse.ltk.core.refactoring.Change} in the history | |
2610 | of the refactorings. | |
2611 | ||
2612 | My first impulse was to remove the, in this case, last two undo changes from the | |
f041551b | 2613 | undo manager\typeref{org.eclipse.ltk.core.refactoring.IUndoManager} for the |
fe0a4c48 | 2614 | \name{Eclipse} refactorings, and then add them to a composite |
8fae7b44 EK |
2615 | change\typeref{org.eclipse.ltk.core.refactoring.CompositeChange} that could be |
2616 | added back to the manager. The interface of the undo manager does not offer a | |
2617 | way to remove/pop the last added undo change, so a possible solution could be to | |
4cb06723 EK |
2618 | decorate\citing{designPatterns} the undo manager, to intercept and collect the |
2619 | undo changes before delegating to the \method{addUndo} | |
f041551b | 2620 | method\methodref{org.eclipse.ltk.core.refactoring.IUndoManager}{addUndo} of the |
8fae7b44 EK |
2621 | manager. Instead of giving it the intended undo change, a null change could be |
2622 | given to prevent it from making any changes if run. Then one could let the | |
2623 | collected undo changes form a composite change to be added to the manager. | |
2624 | ||
2625 | There is a technical challenge with this approach, and it relates to the undo | |
2626 | manager, and the concrete implementation | |
94fc7e71 EK |
2627 | \typewithref{org.eclipse.ltk.internal.core.refactoring}{UndoManager2}. This |
2628 | implementation is designed in a way that it is not possible to just add an undo | |
2629 | change, you have to do it in the context of an active | |
8fae7b44 EK |
2630 | operation\typeref{org.eclipse.core.commands.operations.TriggeredOperations}. |
2631 | One could imagine that it might be possible to trick the undo manager into | |
2632 | believing that you are doing a real change, by executing a refactoring that is | |
2633 | returning a kind of null change that is returning our composite change of undo | |
94fc7e71 | 2634 | refactorings when it is performed. But this is not the way to go. |
8fae7b44 EK |
2635 | |
2636 | Apart from the technical problems with this solution, there is a functional | |
2637 | problem: If it all had worked out as planned, this would leave the undo history | |
2638 | in a dirty state, with multiple empty undo operations corresponding to each of | |
2639 | the sequentially executed refactoring operations, followed by a composite undo | |
2640 | change corresponding to an empty change of the workspace for rounding of our | |
2641 | composite refactoring. The solution to this particular problem could be to | |
2642 | intercept the registration of the intermediate changes in the undo manager, and | |
2643 | only register the last empty change. | |
2644 | ||
2645 | Unfortunately, not everything works as desired with this solution. The grouping | |
2646 | of the undo changes into the composite change does not make the undo operation | |
2647 | appear as an atomic operation. The undo operation is still split up into | |
2648 | separate undo actions, corresponding to the change done by its originating | |
2649 | refactoring. And in addition, the undo actions has to be performed separate in | |
2650 | all the editors involved. This makes it no solution at all, but a step toward | |
2651 | something worse. | |
2652 | ||
2653 | There might be a solution to this problem, but it remains to be found. The | |
2654 | design of the refactoring undo management is partly to be blamed for this, as it | |
2655 | it is to complex to be easily manipulated. | |
2656 | ||
b0e80574 | 2657 | |
0d7fbd88 | 2658 | |
2f2080fb | 2659 | |
03674629 | 2660 | \chapter{Analyzing Source Code in Eclipse} |
5308274d | 2661 | |
356782a0 | 2662 | \section{The Java model}\label{javaModel} |
fe0a4c48 | 2663 | The Java model of \name{Eclipse} is its internal representation of a Java project. It |
5308274d | 2664 | is light-weight, and has only limited possibilities for manipulating source |
fe0a4c48 | 2665 | code. It is typically used as a basis for the Package Explorer in \name{Eclipse}. |
5308274d EK |
2666 | |
2667 | The elements of the Java model is only handles to the underlying elements. This | |
2668 | means that the underlying element of a handle does not need to actually exist. | |
2669 | Hence the user of a handle must always check that it exist by calling the | |
2670 | \method{exists} method of the handle. | |
2671 | ||
8647eef7 EK |
2672 | The handles with descriptions is listed in \myref{tab:javaModel}, while the |
2673 | hierarchy of the Java Model is shown in \myref{fig:javaModel}. | |
4e468834 | 2674 | |
8647eef7 EK |
2675 | \begin{table}[htb] |
2676 | \caption{The elements of the Java Model\citing{vogelEclipseJDT2012}.} | |
2677 | \label{tab:javaModel} | |
4e468834 | 2678 | \centering |
4e468834 | 2679 | % sum must equal number of columns (3) |
8647eef7 EK |
2680 | \begin{tabularx}{\textwidth}{@{} L{0.7} L{1.1} L{1.2} @{}} |
2681 | \toprule | |
4e468834 EK |
2682 | \textbf{Project Element} & \textbf{Java Model element} & |
2683 | \textbf{Description} \\ | |
8647eef7 | 2684 | \midrule |
4e468834 | 2685 | Java project & \type{IJavaProject} & The Java project which contains all other objects. \\ |
8647eef7 | 2686 | \midrule |
4e468834 EK |
2687 | Source folder /\linebreak[2] binary folder /\linebreak[3] external library & |
2688 | \type{IPackageFragmentRoot} & Hold source or binary files, can be a folder | |
2689 | or a library (zip / jar file). \\ | |
8647eef7 | 2690 | \midrule |
4e468834 EK |
2691 | Each package & \type{IPackageFragment} & Each package is below the |
2692 | \type{IPackageFragmentRoot}, sub-packages are not leaves of the package, | |
2693 | they are listed directed under \type{IPackageFragmentRoot}. \\ | |
8647eef7 | 2694 | \midrule |
4e468834 EK |
2695 | Java Source file & \type{ICompilationUnit} & The Source file is always below |
2696 | the package node. \\ | |
8647eef7 EK |
2697 | \midrule |
2698 | Types / Fields /\linebreak[3] Methods & \type{IType} / \type{IField} | |
2699 | /\linebreak[3] \type{IMethod} & Types, fields and methods. \\ | |
2700 | \bottomrule | |
4e468834 | 2701 | \end{tabularx} |
4e468834 EK |
2702 | \end{table} |
2703 | ||
4e468834 | 2704 | |
5308274d EK |
2705 | \begin{figure}[h] |
2706 | \centering | |
2707 | \begin{tikzpicture}[% | |
2708 | grow via three points={one child at (0,-0.7) and | |
2709 | two children at (0,-0.7) and (0,-1.4)}, | |
2710 | edge from parent path={(\tikzparentnode.south west)+(0.5,0) |- | |
2711 | (\tikzchildnode.west)}] | |
2712 | \tikzstyle{every node}=[draw=black,thick,anchor=west] | |
2713 | \tikzstyle{selected}=[draw=red,fill=red!30] | |
2714 | \tikzstyle{optional}=[dashed,fill=gray!50] | |
2715 | \node {\type{IJavaProject}} | |
2716 | child { node {\type{IPackageFragmentRoot}} | |
2717 | child { node {\type{IPackageFragment}} | |
2718 | child { node {\type{ICompilationUnit}} | |
2719 | child { node {\type{IType}} | |
2720 | child { node {\type{\{ IType \}*}} | |
2721 | child { node {\type{\ldots}}} | |
2722 | } | |
2723 | child [missing] {} | |
2724 | child { node {\type{\{ IField \}*}}} | |
2725 | child { node {\type{IMethod}} | |
2726 | child { node {\type{\{ IType \}*}} | |
2727 | child { node {\type{\ldots}}} | |
2728 | } | |
2729 | } | |
2730 | child [missing] {} | |
2731 | child [missing] {} | |
2732 | child { node {\type{\{ IMethod \}*}}} | |
2733 | } | |
2734 | child [missing] {} | |
2735 | child [missing] {} | |
2736 | child [missing] {} | |
2737 | child [missing] {} | |
2738 | child [missing] {} | |
2739 | child [missing] {} | |
2740 | child [missing] {} | |
2741 | child { node {\type{\{ IType \}*}}} | |
2742 | } | |
2743 | child [missing] {} | |
2744 | child [missing] {} | |
2745 | child [missing] {} | |
2746 | child [missing] {} | |
2747 | child [missing] {} | |
2748 | child [missing] {} | |
2749 | child [missing] {} | |
2750 | child [missing] {} | |
2751 | child [missing] {} | |
2752 | child { node {\type{\{ ICompilationUnit \}*}}} | |
2753 | } | |
2754 | child [missing] {} | |
2755 | child [missing] {} | |
2756 | child [missing] {} | |
2757 | child [missing] {} | |
2758 | child [missing] {} | |
2759 | child [missing] {} | |
2760 | child [missing] {} | |
2761 | child [missing] {} | |
2762 | child [missing] {} | |
2763 | child [missing] {} | |
2764 | child [missing] {} | |
2765 | child { node {\type{\{ IPackageFragment \}*}}} | |
2766 | } | |
2767 | child [missing] {} | |
2768 | child [missing] {} | |
2769 | child [missing] {} | |
2770 | child [missing] {} | |
2771 | child [missing] {} | |
2772 | child [missing] {} | |
2773 | child [missing] {} | |
2774 | child [missing] {} | |
2775 | child [missing] {} | |
2776 | child [missing] {} | |
2777 | child [missing] {} | |
2778 | child [missing] {} | |
2779 | child [missing] {} | |
2780 | child { node {\type{\{ IPackageFragmentRoot \}*}}} | |
2781 | ; | |
2782 | \end{tikzpicture} | |
fe0a4c48 | 2783 | \caption{The Java model of \name{Eclipse}. ``\type{\{ SomeElement \}*}'' means |
8647eef7 | 2784 | ``\type{SomeElement} zero or more times``. For recursive structures, |
5308274d EK |
2785 | ``\type{\ldots}'' is used.} |
2786 | \label{fig:javaModel} | |
2787 | \end{figure} | |
2788 | ||
3ab3e132 | 2789 | \section{The Abstract Syntax Tree} |
fe0a4c48 | 2790 | \name{Eclipse} is following the common paradigm of using an abstract syntax tree for |
03674629 EK |
2791 | source code analysis and manipulation. |
2792 | ||
03674629 EK |
2793 | When parsing program source code into something that can be used as a foundation |
2794 | for analysis, the start of the process follows the same steps as in a compiler. | |
3ab3e132 | 2795 | This is all natural, because the way a compiler analyzes code is no different |
03674629 EK |
2796 | from how source manipulation programs would do it, except for some properties of |
2797 | code that is analyzed in the parser, and that they may be differing in what | |
4e468834 | 2798 | kinds of properties they analyze. Thus the process of translation source code |
03674629 | 2799 | into a structure that is suitable for analyzing, can be seen as a kind of |
65e213db EK |
2800 | interrupted compilation process \see{fig:interruptedCompilationProcess}. |
2801 | ||
2802 | \begin{figure}[h] | |
2803 | \centering | |
2804 | \tikzset{ | |
c876d1a4 | 2805 | base/.style={anchor=north, align=center, rectangle, minimum height=1.4cm}, |
65e213db | 2806 | basewithshadow/.style={base, drop shadow, fill=white}, |
c876d1a4 EK |
2807 | outlined/.style={basewithshadow, draw, rounded corners, minimum |
2808 | width=0.4cm}, | |
2809 | primary/.style={outlined, font=\bfseries}, | |
65e213db | 2810 | dashedbox/.style={outlined, dashed}, |
62563950 EK |
2811 | arrowpath/.style={black, align=center, font=\small}, |
2812 | processarrow/.style={arrowpath, ->, >=angle 90, shorten >=1pt}, | |
65e213db | 2813 | } |
62563950 | 2814 | \begin{tikzpicture}[node distance=1.3cm and 3cm, scale=1, every |
c876d1a4 | 2815 | node/.style={transform shape}] |
62563950 EK |
2816 | \node[base](AuxNode1){\small source code}; |
2817 | \node[primary, right=of AuxNode1, xshift=-2.5cm](Scanner){Scanner}; | |
c876d1a4 | 2818 | \node[primary, right=of Scanner, xshift=0.5cm](Parser){Parser}; |
72e039dc EK |
2819 | \node[dashedbox, below=of Parser](SemanticAnalyzer){Semantic\\Analyzer}; |
2820 | \node[dashedbox, left=of SemanticAnalyzer](SourceCodeOptimizer){Source | |
2821 | Code\\Optimizer}; | |
2822 | \node[dashedbox, below=of SourceCodeOptimizer | |
c876d1a4 | 2823 | ](CodeGenerator){Code\\Generator}; |
72e039dc EK |
2824 | \node[dashedbox, right=of CodeGenerator](TargetCodeOptimizer){Target |
2825 | Code\\Optimizer}; | |
2826 | \node[base, right=of TargetCodeOptimizer](AuxNode2){}; | |
c876d1a4 | 2827 | |
62563950 EK |
2828 | \draw[processarrow](AuxNode1) -- (Scanner); |
2829 | ||
2830 | \path[arrowpath] (Scanner) -- node [sloped](tokens){tokens}(Parser); | |
2831 | \draw[processarrow](Scanner) -- (tokens) -- (Parser); | |
2832 | ||
2833 | \path[arrowpath] (Parser) -- node (syntax){syntax | |
2834 | tree}(SemanticAnalyzer); | |
2835 | \draw[processarrow](Parser) -- (syntax) -- (SemanticAnalyzer); | |
2836 | ||
2837 | \path[arrowpath] (SemanticAnalyzer) -- node | |
2838 | [sloped](annotated){annotated\\tree}(SourceCodeOptimizer); | |
2839 | \draw[processarrow, dashed](SemanticAnalyzer) -- (annotated) -- | |
2840 | (SourceCodeOptimizer); | |
2841 | ||
2842 | \path[arrowpath] (SourceCodeOptimizer) -- node | |
2843 | (intermediate){intermediate code}(CodeGenerator); | |
2844 | \draw[processarrow, dashed](SourceCodeOptimizer) -- (intermediate) -- | |
2845 | (CodeGenerator); | |
2846 | ||
2847 | \path[arrowpath] (CodeGenerator) -- node [sloped](target1){target | |
c876d1a4 | 2848 | code}(TargetCodeOptimizer); |
62563950 EK |
2849 | \draw[processarrow, dashed](CodeGenerator) -- (target1) -- |
2850 | (TargetCodeOptimizer); | |
2851 | ||
2852 | \path[arrowpath](TargetCodeOptimizer) -- node [sloped](target2){target | |
c876d1a4 | 2853 | code}(AuxNode2); |
62563950 | 2854 | \draw[processarrow, dashed](TargetCodeOptimizer) -- (target2) (AuxNode2); |
65e213db | 2855 | \end{tikzpicture} |
72e039dc | 2856 | \caption{Interrupted compilation process. {\footnotesize (Full compilation |
52da2102 EK |
2857 | process borrowed from \emph{Compiler construction: principles and practice} |
2858 | by Kenneth C. Louden\citing{louden1997}.)}} | |
65e213db EK |
2859 | \label{fig:interruptedCompilationProcess} |
2860 | \end{figure} | |
2861 | ||
03674629 EK |
2862 | The process starts with a \emph{scanner}, or lexer. The job of the scanner is to |
2863 | read the source code and divide it into tokens for the parser. Therefore, it is | |
2864 | also sometimes called a tokenizer. A token is a logical unit, defined in the | |
2865 | language specification, consisting of one or more consecutive characters. In | |
3ab3e132 | 2866 | the Java language the tokens can for instance be the \var{this} keyword, a curly |
03674629 | 2867 | bracket \var{\{} or a \var{nameToken}. It is recognized by the scanner on the |
3ab3e132 | 2868 | basis of something equivalent of a regular expression. This part of the process |
03674629 EK |
2869 | is often implemented with the use of a finite automata. In fact, it is common to |
2870 | specify the tokens in regular expressions, that in turn is translated into a | |
2871 | finite automata lexer. This process can be automated. | |
2872 | ||
3ab3e132 | 2873 | The program component used to translate a stream of tokens into something |
03674629 EK |
2874 | meaningful, is called a parser. A parser is fed tokens from the scanner and |
2875 | performs an analysis of the structure of a program. It verifies that the syntax | |
2876 | is correct according to the grammar rules of a language, that is usually | |
2877 | specified in a context-free grammar, and often in a variant of the | |
fe0a4c48 | 2878 | \name{Backus--Naur |
03674629 EK |
2879 | Form}\footnote{\url{https://en.wikipedia.org/wiki/Backus-Naur\_Form}}. The |
2880 | result coming from the parser is in the form of an \emph{Abstract Syntax Tree}, | |
2881 | AST for short. It is called \emph{abstract}, because the structure does not | |
2882 | contain all of the tokens produced by the scanner. It only contain logical | |
2883 | constructs, and because it forms a tree, all kinds of parentheses and brackets | |
2884 | are implicit in the structure. It is this AST that is used when performing the | |
2885 | semantic analysis of the code. | |
2886 | ||
2887 | As an example we can think of the expression \code{(5 + 7) * 2}. The root of | |
fe0a4c48 | 2888 | this tree would in \name{Eclipse} be an \type{InfixExpression} with the operator |
d11bcf4d EK |
2889 | \var{TIMES}, and a left operand that is also an \type{InfixExpression} with the |
2890 | operator \var{PLUS}. The left operand \type{InfixExpression}, has in turn a left | |
2891 | operand of type \type{NumberLiteral} with the value \var{``5''} and a right | |
2892 | operand \type{NumberLiteral} with the value \var{``7''}. The root will have a | |
2893 | right operand of type \type{NumberLiteral} and value \var{``2''}. The AST for | |
2894 | this expression is illustrated in \myref{fig:astInfixExpression}. | |
2895 | ||
3ab3e132 EK |
2896 | Contrary to the Java Model, an abstract syntax tree is a heavy-weight |
2897 | representation of source code. It contains information about properties like | |
2898 | type bindings for variables and variable bindings for names. | |
4e468834 EK |
2899 | |
2900 | ||
d11bcf4d EK |
2901 | \begin{figure}[h] |
2902 | \centering | |
a1d68d95 | 2903 | \begin{tikzpicture}[scale=0.8] |
894dce0d | 2904 | \tikzset{level distance=40pt} |
a1d68d95 EK |
2905 | \tikzset{sibling distance=5pt} |
2906 | \tikzstyle{thescale}=[scale=0.8] | |
2907 | \tikzset{every tree node/.style={align=center}} | |
d11bcf4d | 2908 | \tikzset{edge from parent/.append style={thick}} |
a1d68d95 EK |
2909 | \tikzstyle{inode}=[rectangle,rounded corners,draw,fill=lightgray,drop |
2910 | shadow,align=center] | |
2911 | \tikzset{every internal node/.style={inode}} | |
894dce0d | 2912 | \tikzset{every leaf node/.style={draw=none,fill=none}} |
d11bcf4d | 2913 | |
894dce0d EK |
2914 | \Tree [.\type{InfixExpression} [.\type{InfixExpression} |
2915 | [.\type{NumberLiteral} \var{``5''} ] [.\type{Operator} \var{PLUS} ] | |
2916 | [.\type{NumberLiteral} \var{``7''} ] ] | |
d11bcf4d EK |
2917 | [.\type{Operator} \var{TIMES} ] |
2918 | [.\type{NumberLiteral} \var{``2''} ] | |
2919 | ] | |
2920 | \end{tikzpicture} | |
894dce0d | 2921 | \caption{The abstract syntax tree for the expression \code{(5 + 7) * 2}.} |
d11bcf4d EK |
2922 | \label{fig:astInfixExpression} |
2923 | \end{figure} | |
03674629 | 2924 | |
c8088eec | 2925 | \subsection{The AST in Eclipse}\label{astEclipse} |
fe0a4c48 | 2926 | In \name{Eclipse}, every node in the AST is a child of the abstract superclass |
03674629 EK |
2927 | \typewithref{org.eclipse.jdt.core.dom}{ASTNode}. Every \type{ASTNode}, among a |
2928 | lot of other things, provides information about its position and length in the | |
2929 | source code, as well as a reference to its parent and to the root of the tree. | |
2930 | ||
2931 | The root of the AST is always of type \type{CompilationUnit}. It is not the same | |
2932 | as an instance of an \type{ICompilationUnit}, which is the compilation unit | |
894dce0d | 2933 | handle of the Java model. The children of a \type{CompilationUnit} is an |
03674629 EK |
2934 | optional \type{PackageDeclaration}, zero or more nodes of type |
2935 | \type{ImportDecaration} and all its top-level type declarations that has node | |
2936 | types \type{AbstractTypeDeclaration}. | |
2937 | ||
2938 | An \type{AbstractType\-Declaration} can be one of the types | |
2939 | \type{AnnotationType\-Declaration}, \type{Enum\-Declaration} or | |
2940 | \type{Type\-Declaration}. The children of an \type{AbstractType\-Declaration} | |
2941 | must be a subtype of a \type{BodyDeclaration}. These subtypes are: | |
2942 | \type{AnnotationTypeMember\-Declaration}, \type{EnumConstant\-Declaration}, | |
2943 | \type{Field\-Declaration}, \type{Initializer} and \type{Method\-Declaration}. | |
2944 | ||
2945 | Of the body declarations, the \type{Method\-Declaration} is the most interesting | |
2946 | one. Its children include lists of modifiers, type parameters, parameters and | |
2947 | exceptions. It has a return type node and a body node. The body, if present, is | |
2948 | of type \type{Block}. A \type{Block} is itself a \type{Statement}, and its | |
2949 | children is a list of \type{Statement} nodes. | |
2950 | ||
2951 | There are too many types of the abstract type \type{Statement} to list up, but | |
2952 | there exists a subtype of \type{Statement} for every statement type of Java, as | |
2953 | one would expect. This also applies to the abstract type \type{Expression}. | |
2954 | However, the expression \type{Name} is a little special, since it is both used | |
2955 | as an operand in compound expressions, as well as for names in type declarations | |
2956 | and such. | |
2957 | ||
fe0a4c48 | 2958 | There is an overview of some of the structure of an \name{Eclipse} AST in |
94deee9e EK |
2959 | \myref{fig:astEclipse}. |
2960 | ||
e8173df5 EK |
2961 | \begin{figure}[h] |
2962 | \centering | |
5e5908eb | 2963 | \begin{tikzpicture}[scale=0.8] |
0f918507 EK |
2964 | \tikzset{level distance=50pt} |
2965 | \tikzset{sibling distance=5pt} | |
5e5908eb | 2966 | \tikzstyle{thescale}=[scale=0.8] |
e8173df5 | 2967 | \tikzset{every tree node/.style={align=center}} |
5e5908eb EK |
2968 | \tikzset{edge from parent/.append style={thick}} |
2969 | \tikzstyle{inode}=[rectangle,rounded corners,draw,fill=lightgray,drop | |
2970 | shadow,align=center] | |
2971 | \tikzset{every internal node/.style={inode}} | |
e8173df5 EK |
2972 | \tikzset{every leaf node/.style={draw=none,fill=none}} |
2973 | ||
e601ce99 EK |
2974 | \Tree [.\type{CompilationUnit} [.\type{[ PackageDeclaration ]} [.\type{Name} ] |
2975 | [.\type{\{ Annotation \}*} ] ] | |
2976 | [.\type{\{ ImportDeclaration \}*} [.\type{Name} ] ] | |
0f918507 | 2977 | [.\type{\{ AbstractTypeDeclaration \}+} [.\node(site){\type{\{ |
e601ce99 | 2978 | BodyDeclaration \}*}}; ] [.\type{SimpleName} ] ] |
e8173df5 | 2979 | ] |
e601ce99 | 2980 | \begin{scope}[shift={(0.5,-6)}] |
5e5908eb | 2981 | \node[inode,thescale](root){\type{MethodDeclaration}}; |
e601ce99 | 2982 | \node[inode,thescale](modifiers) at (4.5,-5){\type{\{ IExtendedModifier \}*} |
5e5908eb | 2983 | \\ {\footnotesize (Of type \type{Modifier} or \type{Annotation})}}; |
e601ce99 | 2984 | \node[inode,thescale](typeParameters) at (-6,-3.5){\type{\{ TypeParameter |
5e5908eb | 2985 | \}*}}; |
fbeec228 | 2986 | \node[inode,thescale](parameters) at (-5,-5){\type{\{ |
5e5908eb | 2987 | SingleVariableDeclaration \}*} \\ {\footnotesize (Parameters)}}; |
e601ce99 | 2988 | \node[inode,thescale](exceptions) at (5,-3){\type{\{ Name \}*} \\ |
5e5908eb | 2989 | {\footnotesize (Exceptions)}}; |
e601ce99 | 2990 | \node[inode,thescale](return) at (-6.5,-2){\type{Type} \\ {\footnotesize |
5e5908eb | 2991 | (Return type)}}; |
e601ce99 EK |
2992 | \begin{scope}[shift={(0,-5)}] |
2993 | \Tree [.\node(body){\type{[ Block ]} \\ {\footnotesize (Body)}}; | |
2994 | [.\type{\{ Statement \}*} [.\type{\{ Expression \}*} ] | |
2995 | [.\type{\{ Statement \}*} [.\type{\ldots} ]] | |
2996 | ] | |
2997 | ] | |
2998 | \end{scope} | |
0f918507 | 2999 | \end{scope} |
e601ce99 EK |
3000 | \draw[->,>=triangle 90,shorten >=1pt](root.east)..controls +(east:2) and |
3001 | +(south:1)..(site.south); | |
0f918507 | 3002 | |
5e5908eb EK |
3003 | \draw (root.south) -- (modifiers); |
3004 | \draw (root.south) -- (typeParameters); | |
3005 | \draw (root.south) -- ($ (parameters.north) + (2,0) $); | |
3006 | \draw (root.south) -- (exceptions); | |
3007 | \draw (root.south) -- (return); | |
3008 | \draw (root.south) -- (body); | |
3009 | ||
e8173df5 | 3010 | \end{tikzpicture} |
fe0a4c48 | 3011 | \caption{The format of the abstract syntax tree in \name{Eclipse}.} |
e8173df5 EK |
3012 | \label{fig:astEclipse} |
3013 | \end{figure} | |
94deee9e | 3014 | \todoin{Add more to the AST format tree? \myref{fig:astEclipse}} |
a2868580 | 3015 | |
b8fce5af | 3016 | \section{The ASTVisitor}\label{astVisitor} |
3ab3e132 EK |
3017 | So far, the only thing that has been addressed is how the data that is going to |
3018 | be the basis for our analysis is structured. Another aspect of it is how we are | |
3019 | going to traverse the AST to gather the information we need, so we can conclude | |
3020 | about the properties we are analysing. It is of course possible to start at the | |
3021 | top of the tree, and manually search through its nodes for the ones we are | |
3022 | looking for, but that is a bit inconvenient. To be able to efficiently utilize | |
3023 | such an approach, we would need to make our own framework for traversing the | |
3024 | tree and visiting only the types of nodes we are after. Luckily, this | |
fe0a4c48 | 3025 | functionality is already provided in \name{Eclipse}, by its |
50976f51 EK |
3026 | \typewithref{org.eclipse.jdt.core.dom}{ASTVisitor}. |
3027 | ||
fe0a4c48 EK |
3028 | The \name{Eclipse} AST, together with its \type{ASTVisitor}, follows the |
3029 | \pattern{Visitor} pattern\citing{designPatterns}. The intent of this design | |
3030 | pattern is to facilitate extending the functionality of classes without touching | |
3031 | the classes themselves. | |
0a8ca90c | 3032 | |
fe0a4c48 EK |
3033 | Let us say that there is a class hierarchy of elements. These elements all have |
3034 | a method \method{accept(Visitor visitor)}. In its simplest form, the | |
0a8ca90c EK |
3035 | \method{accept} method just calls the \method{visit} method of the visitor with |
3036 | itself as an argument, like this: \code{visitor.visit(this)}. For the visitors | |
3037 | to be able to extend the functionality of all the classes in the elements | |
3038 | hierarchy, each \type{Visitor} must have one visit method for each concrete | |
3039 | class in the hierarchy. Say the hierarchy consists of the concrete classes | |
3040 | \type{ConcreteElementA} and \type{ConcreteElementB}. Then each visitor must have | |
3041 | the (possibly empty) methods \method{visit(ConcreteElementA element)} and | |
3042 | \method{visit(ConcreteElementB element)}. This scenario is depicted in | |
3043 | \myref{fig:visitorPattern}. | |
50976f51 | 3044 | |
3572a8ac EK |
3045 | \begin{figure}[h] |
3046 | \centering | |
3047 | \tikzstyle{abstract}=[rectangle, draw=black, fill=white, drop shadow, text | |
3048 | centered, anchor=north, text=black, text width=6cm, every one node | |
3049 | part/.style={align=center, font=\bfseries\itshape}] | |
3050 | \tikzstyle{concrete}=[rectangle, draw=black, fill=white, drop shadow, text | |
3051 | centered, anchor=north, text=black, text width=6cm] | |
3052 | \tikzstyle{inheritarrow}=[->, >=open triangle 90, thick] | |
3053 | \tikzstyle{commentarrow}=[->, >=angle 90, dashed] | |
3054 | \tikzstyle{line}=[-, thick] | |
3055 | \tikzset{every one node part/.style={align=center, font=\bfseries}} | |
3056 | \tikzset{every second node part/.style={align=center, font=\ttfamily}} | |
3572a8ac EK |
3057 | |
3058 | \begin{tikzpicture}[node distance=1cm, scale=0.8, every node/.style={transform | |
3059 | shape}] | |
3060 | \node (Element) [abstract, rectangle split, rectangle split parts=2] | |
3061 | { | |
3062 | \nodepart{one}{Element} | |
3063 | \nodepart{second}{+accept(visitor: Visitor)} | |
3064 | }; | |
3065 | \node (AuxNode01) [text width=0, minimum height=2cm, below=of Element] {}; | |
3066 | \node (ConcreteElementA) [concrete, rectangle split, rectangle split | |
3067 | parts=2, left=of AuxNode01] | |
3068 | { | |
3069 | \nodepart{one}{ConcreteElementA} | |
3070 | \nodepart{second}{+accept(visitor: Visitor)} | |
3071 | }; | |
3072 | \node (ConcreteElementB) [concrete, rectangle split, rectangle split | |
3073 | parts=2, right=of AuxNode01] | |
3074 | { | |
3075 | \nodepart{one}{ConcreteElementB} | |
3076 | \nodepart{second}{+accept(visitor: Visitor)} | |
3077 | }; | |
3078 | ||
3079 | \node[comment, below=of ConcreteElementA] (CommentA) {visitor.visit(this)}; | |
3080 | ||
3081 | \node[comment, below=of ConcreteElementB] (CommentB) {visitor.visit(this)}; | |
3082 | ||
3083 | \node (AuxNodeX) [text width=0, minimum height=1cm, below=of AuxNode01] {}; | |
3084 | ||
3085 | \node (Visitor) [abstract, rectangle split, rectangle split parts=2, | |
3086 | below=of AuxNodeX] | |
3087 | { | |
3088 | \nodepart{one}{Visitor} | |
3089 | \nodepart{second}{+visit(ConcreteElementA)\\+visit(ConcreteElementB)} | |
3090 | }; | |
3091 | \node (AuxNode02) [text width=0, minimum height=2cm, below=of Visitor] {}; | |
3092 | \node (ConcreteVisitor1) [concrete, rectangle split, rectangle split | |
3093 | parts=2, left=of AuxNode02] | |
3094 | { | |
3095 | \nodepart{one}{ConcreteVisitor1} | |
3096 | \nodepart{second}{+visit(ConcreteElementA)\\+visit(ConcreteElementB)} | |
3097 | }; | |
3098 | \node (ConcreteVisitor2) [concrete, rectangle split, rectangle split | |
3099 | parts=2, right=of AuxNode02] | |
3100 | { | |
3101 | \nodepart{one}{ConcreteVisitor2} | |
3102 | \nodepart{second}{+visit(ConcreteElementA)\\+visit(ConcreteElementB)} | |
3103 | }; | |
3104 | ||
3105 | ||
3106 | \draw[inheritarrow] (ConcreteElementA.north) -- ++(0,0.7) -| | |
3107 | (Element.south); | |
3108 | \draw[line] (ConcreteElementA.north) -- ++(0,0.7) -| | |
3109 | (ConcreteElementB.north); | |
3110 | ||
3111 | \draw[inheritarrow] (ConcreteVisitor1.north) -- ++(0,0.7) -| | |
3112 | (Visitor.south); | |
3113 | \draw[line] (ConcreteVisitor1.north) -- ++(0,0.7) -| | |
3114 | (ConcreteVisitor2.north); | |
3115 | ||
3116 | \draw[commentarrow] (CommentA.north) -- (ConcreteElementA.south); | |
3117 | \draw[commentarrow] (CommentB.north) -- (ConcreteElementB.south); | |
3118 | ||
3119 | ||
3120 | \end{tikzpicture} | |
3121 | \caption{The Visitor Pattern.} | |
3122 | \label{fig:visitorPattern} | |
3123 | \end{figure} | |
3124 | ||
0a8ca90c EK |
3125 | The use of the visitor pattern can be appropriate when the hierarchy of elements |
3126 | is mostly stable, but the family of operations over its elements is constantly | |
fe0a4c48 | 3127 | growing. This is clearly the case for the \name{Eclipse} AST, since the hierarchy of |
0a8ca90c EK |
3128 | type \type{ASTNode} is very stable, but the functionality of its elements is |
3129 | extended every time someone needs to operate on the AST. Another aspect of the | |
fe0a4c48 | 3130 | \name{Eclipse} implementation is that it is a public API, and the visitor pattern is an |
0a8ca90c EK |
3131 | easy way to provide access to the nodes in the tree. |
3132 | ||
fe0a4c48 | 3133 | The version of the visitor pattern implemented for the AST nodes in \name{Eclipse} also |
0a8ca90c EK |
3134 | provides an elegant way to traverse the tree. It does so by following the |
3135 | convention that every node in the tree first let the visitor visit itself, | |
b3adff95 EK |
3136 | before it also makes all its children accept the visitor. The children are only |
3137 | visited if the visit method of their parent returns \var{true}. This pattern | |
3138 | then makes for a prefix traversal of the AST. If postfix traversal is desired, | |
3139 | the visitors also has \method{endVisit} methods for each node type, that is | |
3140 | called after the \method{visit} method for a node. In addition to these visit | |
3141 | methods, there are also the methods \method{preVisit(ASTNode)}, | |
3142 | \method{postVisit(ASTNode)} and \method{preVisit2(ASTNode)}. The | |
3143 | \method{preVisit} method is called before the type-specific \method{visit} | |
3144 | method. The \method{postVisit} method is called after the type-specific | |
3145 | \method{endVisit}. The type specific \method{visit} is only called if | |
3146 | \method{preVisit2} returns \var{true}. Overriding the \method{preVisit2} is also | |
3147 | altering the behavior of \method{preVisit}, since the default implementation is | |
94c59647 EK |
3148 | responsible for calling it. |
3149 | ||
3150 | An example of a trivial \type{ASTVisitor} is shown in | |
3151 | \myref{lst:astVisitorExample}. | |
3152 | ||
3153 | \begin{listing} | |
3154 | \begin{minted}{java} | |
3155 | public class CollectNamesVisitor extends ASTVisitor { | |
3156 | Collection<Name> names = new LinkedList<Name>(); | |
3157 | ||
3158 | @Override | |
3159 | public boolean visit(QualifiedName node) { | |
3160 | names.add(node); | |
3161 | return false; | |
3162 | } | |
3163 | ||
3164 | @Override | |
3165 | public boolean visit(SimpleName node) { | |
3166 | names.add(node); | |
3167 | return true; | |
3168 | } | |
3169 | } | |
3170 | \end{minted} | |
3171 | \caption{An \type{ASTVisitor} that visits all the names in a subtree and adds | |
3172 | them to a collection, except those names that are children of any | |
3173 | \type{QualifiedName}.} | |
3174 | \label{lst:astVisitorExample} | |
3175 | \end{listing} | |
3176 | ||
b8fce5af EK |
3177 | \section{Property collectors}\label{propertyCollectors} |
3178 | The prefixes and unfixes are found by property | |
3179 | collectors\typeref{no.uio.ifi.refaktor.extractors.collectors.PropertyCollector}. | |
3180 | A property collector is of the \type{ASTVisitor} type, and thus visits nodes of | |
3181 | type \type{ASTNode} of the abstract syntax tree \see{astVisitor}. | |
3182 | ||
3183 | \subsection{The PrefixesCollector} | |
3184 | The \typewithref{no.uio.ifi.refaktor.extractors.collectors}{PrefixesCollector} | |
ccd252c5 | 3185 | finds prefixes that makes up the basis for calculating move targets for the |
fe0a4c48 | 3186 | \refa{Extract and Move Method} refactoring. It visits expression |
b8fce5af EK |
3187 | statements\typeref{org.eclipse.jdt.core.dom.ExpressionStatement} and creates |
3188 | prefixes from its expressions in the case of method invocations. The prefixes | |
3189 | found is registered with a prefix set, together with all its sub-prefixes. | |
3190 | ||
3191 | \subsection{The UnfixesCollector}\label{unfixes} | |
3192 | The \typewithref{no.uio.ifi.refaktor.extractors.collectors}{UnfixesCollector} | |
47c0bea8 | 3193 | finds unfixes within a selection. |
5230243c | 3194 | \todoin{Give more technical detail?} |
b8fce5af | 3195 | |
b8fce5af | 3196 | |
0a8ca90c | 3197 | |
5195bf0c | 3198 | \subsection{The ContainsReturnStatementCollector} |
21506154 | 3199 | \todoin{Remove section?} |
5195bf0c EK |
3200 | The |
3201 | \typewithref{no.uio.ifi.refaktor.analyze.collectors}{ContainsReturnStatementCollector} | |
3202 | is a very simple property collector. It only visits the return statements within | |
3203 | a selection, and can report whether it encountered a return statement or not. | |
3204 | ||
b8d069e4 EK |
3205 | \subsection{The LastStatementCollector} |
3206 | The \typewithref{no.uio.ifi.refaktor.analyze.collectors}{LastStatementCollector} | |
3207 | collects the last statement of a selection. It does so by only visiting the top | |
3208 | level statements of the selection, and compares the textual end offset of each | |
3ab3e132 | 3209 | encountered statement with the end offset of the previous statement found. |
b8d069e4 | 3210 | |
95c0f364 | 3211 | \section{Checkers}\label{checkers} |
d6f8e65a EK |
3212 | The checkers are a range of classes that checks that text selections complies |
3213 | with certain criteria. All checkers operates under the assumption that the code | |
3214 | they check is free from compilation errors. If a | |
95c0f364 EK |
3215 | \typewithref{no.uio.ifi.refaktor.analyze.analyzers}{Checker} fails, it throws a |
3216 | \type{CheckerException}. The checkers are managed by the | |
3217 | \type{LegalStatementsChecker}, which does not, in fact, implement the | |
3218 | \type{Checker} interface. It does, however, run all the checkers registered with | |
3219 | it, and reports that all statements are considered legal if no | |
08cbba3b | 3220 | \type{CheckerException} is thrown. Many of the checkers either extends the |
f72f72f1 | 3221 | \type{PropertyCollector} or utilizes one or more property collectors to verify |
3ab3e132 | 3222 | some criteria. The checkers registered with the \type{LegalStatementsChecker} |
f72f72f1 | 3223 | are described next. They are run in the order presented below. |
95c0f364 | 3224 | |
a22915d0 | 3225 | \subsection{The CallToProtectedOrPackagePrivateMethodChecker} |
21506154 EK |
3226 | This checker is used to check that at selection does not contain a call to a |
3227 | method that is protected or package-private. Such a method either has the access | |
3228 | modifier \code{protected} or it has no access modifier. | |
a22915d0 | 3229 | |
21506154 EK |
3230 | The workings of the \type{CallToProtectedOrPackagePrivateMethod\-Checker} is |
3231 | very simple. It looks for calls to methods that are either protected or | |
3232 | package-private within the selection, and throws an | |
3233 | \type{IllegalExpressionFoundException} if one is found. | |
a22915d0 | 3234 | |
1fceb439 EK |
3235 | \subsection{The DoubleClassInstanceCreationChecker} |
3236 | The \type{DoubleClassInstanceCreationChecker} checks that there are no double | |
3237 | class instance creations where the inner constructor call take and argument that | |
3238 | is built up using field references. | |
3239 | ||
3240 | The checker visits all nodes of type \type{ClassInstanceCreation} within a | |
3241 | selection. For all of these nodes, if its parent also is a class instance | |
3242 | creation, it accepts a visitor that throws a | |
e36eade0 | 3243 | \type{IllegalExpressionFoundException} if it encounters a name that is a field |
1fceb439 EK |
3244 | reference. |
3245 | ||
2a4b8dea | 3246 | \subsection{The InstantiationOfNonStaticInnerClassChecker} |
c6102ec2 EK |
3247 | The \type{InstantiationOfNonStaticInnerClassChecker} checks that selections |
3248 | does not contain instantiations of non-static inner classes. The | |
58467004 | 3249 | \type{MoveInstanceMethodProcessor} in \name{Eclipse} does not handle such |
c6102ec2 EK |
3250 | instantiations gracefully when moving a method. This problem is also related to |
3251 | bug\ldots \todoin{File Eclipse bug report} | |
2a4b8dea | 3252 | |
8d0caf4c | 3253 | \subsection{The EnclosingInstanceReferenceChecker} |
98b06ec2 EK |
3254 | The purpose of this checker is to verify that the names in a text selection are |
3255 | not referencing any enclosing instances. In theory, the underlying problem could | |
3256 | be solved in some situations, but our dependency on the | |
8d0caf4c EK |
3257 | \type{MoveInstanceMethodProcessor} prevents this. |
3258 | ||
3259 | The | |
3260 | \typewithref{no.uio.ifi.refaktor.analyze.analyzers}{EnclosingInstanceReferenceChecker} | |
3261 | is a modified version of the | |
801ff00a | 3262 | \typewithref{org.eclipse.jdt.internal.corext.refactoring.structure.MoveInstanceMethod\-Processor}{EnclosingInstanceReferenceFinder} |
8d0caf4c | 3263 | from the \type{MoveInstanceMethodProcessor}. Wherever the |
98b06ec2 EK |
3264 | \type{EnclosingInstanceReferenceFinder} would create a fatal error status, the |
3265 | checker will throw a \type{CheckerException}. | |
3266 | ||
3267 | The checker works by first finding all of the enclosing types of a selection. | |
3268 | Thereafter, it visits all the simple names of the selection to check that they | |
3269 | are not references to variables or methods declared in any of the enclosing | |
3270 | types. In addition, the checker visits \var{this}-expressions to verify that no | |
3271 | such expressions are qualified with any name. | |
8d0caf4c | 3272 | |
9cc2cd59 | 3273 | \subsection{The ReturnStatementsChecker}\label{returnStatementsChecker} |
d59e3ab7 EK |
3274 | The checker for return statements is meant to verify that a text selection is |
3275 | consistent regarding return statements. | |
3276 | ||
3277 | If the selection is free from return statements, then the checker validates. So | |
3278 | this is the first thing the checker investigates. | |
d6f8e65a | 3279 | |
e36eade0 | 3280 | If the checker proceeds any further, it is because the selection contains one |
d6f8e65a | 3281 | or more return statements. The next test is therefore to check if the last |
d59e3ab7 EK |
3282 | statement of the selection ends in either a return or a throw statement. The |
3283 | responsibility for checking that the last statement of the selection eventually | |
3284 | ends in a return or throw statement, is put on the | |
801ff00a | 3285 | \type{LastStatementOfSelectionEndsInReturnOrThrowChecker}. For every node |
d59e3ab7 | 3286 | visited, if the node is a statement, it does a test to see if the statement is a |
801ff00a EK |
3287 | return, a throw or if it is an implicit return statement. If this is the case, |
3288 | no further checking is done. This checking is done in the \code{preVisit2} | |
3289 | method \see{astVisitor}. If the node is not of a type that is being handled by | |
d59e3ab7 | 3290 | its type-specific visit method, the checker performs a simple test. If the node |
801ff00a EK |
3291 | being visited is not the last statement of its parent that is also enclosed by |
3292 | the selection, an \type{IllegalStatementFoundException} is thrown. This ensures | |
3293 | that all statements are taken care of, one way or the other. It also ensures | |
3294 | that the checker is conservative in the way it checks for legality of the | |
3295 | selection. | |
3296 | ||
3297 | To examine if a statement is an implicit return statement, the checker first | |
3298 | finds the last statement declared in its enclosing method. If this statement is | |
3299 | the same as the one under investigation, it is considered an implicit return | |
3300 | statement. If the statements are not the same, the checker does a search to see | |
d59e3ab7 | 3301 | if the statement examined is also the last statement of the method that can be |
801ff00a EK |
3302 | reached. This includes the last statement of a block statement, a labeled |
3303 | statement, a synchronized statement or a try statement, that in turn is the last | |
d59e3ab7 EK |
3304 | statement enclosed by one of the statement types listed. This search goes |
3305 | through all the parents of a statement until a statement is found that is not | |
3306 | one of the mentioned acceptable parent statements. If the search ends in a | |
3307 | method declaration, then the statement is considered to be the last reachable | |
3308 | statement of the method, and thus it is an implicit return statement. | |
3309 | ||
3310 | There are two kinds of statements that are handled explicitly: If-statements and | |
3311 | try-statements. Block, labeled and do-statements are handled by fall-through to | |
3312 | the other two. | |
3313 | ||
3314 | If-statements are handled explicitly by overriding their type-specific visit | |
3315 | method. If the then-part does not contain any return or throw statements an | |
3316 | \type{IllegalStatementFoundException} is thrown. If it does contain a return or | |
801ff00a | 3317 | throw, its else-part is checked. If the else-part is non-existent, or it does |
d59e3ab7 EK |
3318 | not contain any return or throw statements an exception is thrown. If no |
3319 | exception is thrown while visiting the if-statement, its children are visited. | |
801ff00a | 3320 | |
d59e3ab7 | 3321 | A try-statement is checked very similar to an if-statement. Its body must |
801ff00a | 3322 | contain a return or throw. The same applies to its catch clauses and finally |
d59e3ab7 | 3323 | body. Failure to validate produces an \type{IllegalStatementFoundException}. |
801ff00a EK |
3324 | |
3325 | If the checker does not complain at any point, the selection is considered valid | |
3326 | with respect to return statements. | |
41cde50e EK |
3327 | |
3328 | \subsection{The AmbiguousReturnValueChecker} | |
5230243c | 3329 | This checker verifies that there are no ambiguous return values in a selection. |
9cc2cd59 | 3330 | |
5230243c | 3331 | First, the checker needs to collect some data. Those data are the binding keys |
9cc2cd59 EK |
3332 | for all simple names that are assigned to within the selection, including |
3333 | variable declarations, but excluding fields. The checker also collects whether | |
3334 | there exists a return statement in the selection or not. No further checks of | |
3335 | return statements are needed, since, at this point, the selection is already | |
3336 | checked for illegal return statements \see{returnStatementsChecker}. | |
3337 | ||
3338 | After the binding keys of the assignees are collected, the checker searches the | |
3339 | part of the enclosing method that is after the selection for references whose | |
3ab3e132 EK |
3340 | binding keys are among the collected keys. If more than one unique referral is |
3341 | found, or only one referral is found, but the selection also contains a return | |
3342 | statement, we have a situation with an ambiguous return value, and an exception | |
3343 | is thrown. | |
9cc2cd59 EK |
3344 | |
3345 | %\todoin{Explain why we do not need to consider variables assigned inside | |
3346 | %local/anonymous classes. (The referenced variables need to be final and so | |
3347 | %on\ldots)} | |
41cde50e EK |
3348 | |
3349 | \subsection{The IllegalStatementsChecker} | |
3350 | This checker is designed to check for illegal statements. | |
3351 | ||
8fa89d14 EK |
3352 | Notice that labels in break and continue statements needs some special |
3353 | treatment. Since a label does not have any binding information, we have to | |
3354 | search upwards in the AST to find the \type{LabeledStatement} that corresponds | |
3355 | to the label from the break or continue statement, and check that it is | |
3356 | contained in the selection. If the break or continue statement does not have a | |
3357 | label attached to it, it is checked that its innermost enclosing loop or switch | |
3358 | statement (break statements only) also is contained in the selection. | |
08cbba3b | 3359 | |
8fa89d14 | 3360 | \todoin{Follow the development in the semantics section\ldots} |
41cde50e | 3361 | |
58467004 EK |
3362 | \chapter{Case Studies} |
3363 | ||
3364 | In this chapter I am going to present a few case studies. This is done to give | |
e36eade0 | 3365 | an impression of how the search-based \ExtractAndMoveMethod refactoring |
01d46361 EK |
3366 | performs when giving it a larger project to take on. I will try to answer where |
3367 | it lacks, in terms of completeness, as well as showing its effect on refactored | |
3368 | source code. | |
58467004 EK |
3369 | |
3370 | The first and primary case, is refactoring source code from the \name{Eclipse | |
3371 | JDT UI} project. The project is chosen because it is a real project, still in | |
3372 | development, with a large code base that is written by many different people | |
3373 | through several years. The code is installed in thousands of \name{Eclipse} | |
3374 | applications worldwide, and must be seen as a good representative for | |
3375 | professionally written Java source code. It is also the home for most of the JDT | |
3376 | refactoring code. | |
3377 | ||
3378 | For the second case, the \ExtractAndMoveMethod refactoring is fed the | |
3379 | \code{no.uio.ifi.refaktor} project. This is done as a variation of the | |
3380 | ``dogfooding'' methodology, where you use your own tools to do your job, also | |
3381 | referred to as ``eating your own dog | |
3382 | food''\citing{harrisonDogfooding2006}. | |
3383 | ||
3384 | \section{The tools} | |
6fabec9b EK |
3385 | For conducting these experiments, three tools are used. Two of the ``tools'' |
3386 | both uses Eclipse as their platform. The first is our own tool, | |
58467004 | 3387 | written to be able to run the \ExtractAndMoveMethod refactoring as a batch |
e36eade0 | 3388 | process, analyzing and refactoring many methods after each other. The second is |
c9488804 EK |
3389 | JUnit, that is used for running the projects own unit tests on the target code |
3390 | both before and after it is refactored. The last tool that is used is a code | |
3391 | quality management tool, called \name{SonarQube}. It can be used to perform | |
3392 | different tasks for assuring code quality, but we are only going to take | |
3393 | advantage of one of its main features, namely Quality profiles. | |
58467004 EK |
3394 | |
3395 | A quality profile is used to define a set of coding rules that a project is | |
3396 | supposed to comply with. Failure to following these rules will be recorded as | |
3397 | so-called ``issues'', marked as having one of several degrees of severities, | |
3398 | ranging from ``info'' to ``blocker'', where the latter one is the most severe. | |
3399 | The measurements done for these case studies are therefore not presented as | |
3400 | fine-grained software metrics results, but rather as the number of issues for | |
6fabec9b EK |
3401 | each defined rule. |
3402 | ||
bc7b5d67 | 3403 | In addition to the coding rules defined through quality profiles, \name{SonarQube} |
6fabec9b EK |
3404 | calculates the complexity of source code. The metric that is used is cyclomatic |
3405 | complexity, developed by Thomas J. McCabe in | |
3406 | 1976\citing{mccabeComplexity1976}. In this metric, functions have an initial | |
3407 | complexity of 1, and whenever the control flow of a function splits, the | |
3408 | complexity increases by | |
3409 | one\footnote{\url{http://docs.codehaus.org/display/SONAR/Metric+definitions}}. | |
bc7b5d67 | 3410 | \name{SonarQube} discriminates between functions and accessors. Accessors |
6fabec9b EK |
3411 | are methods that are recognized as setters or getters. Accessors are not counted |
3412 | in the complexity analysis. | |
3413 | ||
bc7b5d67 EK |
3414 | \section{The \name{SonarQube} quality profile} |
3415 | The quality profile that is used with \name{SonarQube} in these case studies has got | |
3416 | the name \name{IFI Refaktor Case Study} (version 6). The rules defined in the | |
3417 | profile are chosen because they are the available rules found in \name{SonarQube} that | |
3418 | measures complexity and coupling. Now follows a description of the rules in the | |
3419 | quality profile. The values that are set for these rules are listed in | |
6fabec9b EK |
3420 | \myref{tab:qualityProfile1}. |
3421 | ||
3422 | \begin{description} | |
3423 | \item[Avoid too complex class] is a rule that measures cyclomatic complexity | |
3424 | for every statement in the body of a class, except for setters and getter. | |
bc7b5d67 | 3425 | The threshold value set is its default value of 200. |
6fabec9b EK |
3426 | |
3427 | \item[Classes should not be coupled to too many other classes ] is a rule that | |
3428 | measures how many other classes a class depends upon. It does not count the | |
3429 | dependencies of nested classes. It is meant to promote the Single | |
3430 | Responsibility Principle. Although not explicitly stated, the rule's metric | |
3431 | resembles the \metr{Coupling between object classes} (CBO) metric that is | |
3432 | described by Chidamber and Kemerer in their article \tit{A Metrics Suite for | |
3433 | Object Oriented Design}\citing{metricsSuite1994}. The max value for the rule | |
3434 | is chosen on the background of an empirical study by Raed Shatnawi, that | |
3435 | concludes that the number 9 is the most useful threshold for the CBO | |
98660ec0 EK |
3436 | metric\citing{shatnawiQuantitative2010}. This study is also performed on |
3437 | Eclipse source code, so this threshold value should be particularly well | |
3438 | suited for the Eclipse JDT UI case in this chapter. | |
6fabec9b EK |
3439 | |
3440 | \item[Control flow statements \ldots{} should not be nested too deeply] is | |
3441 | a rule that is meant to counter ``Spaghetti code''. It measures the nesting | |
3442 | level of if, for, while, switch and try statements. The nesting levels start | |
bc7b5d67 | 3443 | at 1. The max value set is its default value of 3. |
6fabec9b EK |
3444 | |
3445 | \item[Methods should not be too complex] is a rule that measures cyclomatic | |
3446 | complexity the same way as the ``Avoid too complex class'' rule. The max | |
3447 | value used is 10, which ``seems like a reasonable, but not magical, upper | |
3448 | limit``\citing{mccabeComplexity1976}. | |
3449 | ||
3450 | \item[Methods should not have too many lines] is a rule that simply measures | |
bc7b5d67 EK |
3451 | the number of lines in methods. The threshold value of 20 is used for this |
3452 | metric. This is based on my own subjective opinions, as the default value of | |
3453 | 100 seems a bit too loose. | |
6fabec9b EK |
3454 | |
3455 | \item[NPath Complexity] is a rule that measures the number of possible | |
3456 | execution paths through a function. The value used is the default value of | |
3457 | 200, that seems like a recognized threshold for this metric. | |
3458 | ||
3459 | \item[Too many methods] is a rule that measures the number of methods in a | |
bc7b5d67 | 3460 | class. The threshold value used is the default value of 10. |
6fabec9b EK |
3461 | |
3462 | \end{description} | |
3463 | ||
3464 | ||
3465 | \begin{table}[htb] | |
bc7b5d67 | 3466 | \caption{The \name{IFI Refaktor Case Study} quality profile (version 6).} |
6fabec9b EK |
3467 | \label{tab:qualityProfile1} |
3468 | \centering | |
3469 | \begin{tabularx}{\textwidth}{@{}>{\bfseries}L{1.5}R{0.5}@{}} | |
3470 | \toprule | |
3471 | \textbf{Rule} & \textbf{Max value} \\ | |
3472 | \midrule | |
3473 | Avoid too complex class & 200 \\ | |
3474 | Classes should not be coupled to too many other classes (Single | |
3475 | Responsibility Principle) & 9 \\ | |
3476 | Control flow statements \ldots{} should not be nested too deeply & | |
3477 | 3 \\ | |
3478 | Methods should not be too complex & 10 \\ | |
3479 | Methods should not have too many lines & 20 \\ | |
3480 | NPath Complexity & 200 \\ | |
bc7b5d67 | 3481 | Too many methods & 10 \\ |
6fabec9b EK |
3482 | |
3483 | \bottomrule | |
3484 | \end{tabularx} | |
3485 | \end{table} | |
58467004 | 3486 | |
bc7b5d67 EK |
3487 | \section{The input} |
3488 | A precondition for the source code that is going to be the target for a series | |
3489 | of \ExtractAndMoveMethod refactorings, is that it is organized as an Eclipse | |
3490 | project. It is also assumed that the code is free from compilation errors. | |
3491 | ||
3492 | \section{The experiment} | |
3493 | For a given project, the first job that is done, is to refactor its source code. | |
3494 | The refactoring batch job produces three things: The refactored project, | |
3495 | statistics gathered during the execution of the series of refactorings, and an | |
3496 | error log describing any errors happening during this execution. See | |
3497 | \myref{sec:benchmarking} for more information about how the refactorings are | |
3498 | performed. | |
3499 | ||
3500 | After the refactoring process is done, the before- and after-code is analyzed | |
3501 | with \name{SonarQube}. The analysis results are then stored in a database and | |
3502 | displayed through a \name{SonarQube} server with a web interface.\todoin{How | |
3503 | long are these results going to be publicly available?} | |
3504 | ||
3505 | The before- and after-code is also tested with their own unit tests. This is | |
3506 | done to discover any changes in the semantic behavior of the refactored code, | |
3507 | within the limits of these tests. | |
3508 | ||
58467004 | 3509 | \section{Case 1: The Eclipse JDT UI project} |
98660ec0 | 3510 | This case is the ultimate test for our \ExtractAndMoveMethod refactoring. The |
e36eade0 | 3511 | target source code is massive. With its over 300,000 lines of code and over |
98660ec0 | 3512 | 25,000 methods, it is formidable task to perform automated changes on it. There |
e36eade0 | 3513 | should be plenty of situations where things can go wrong, and, as we shall see |
701c559a | 3514 | later, they do. |
98660ec0 EK |
3515 | |
3516 | I will start by presenting some statistics from the refactoring execution, | |
3517 | before I pick apart the \name{SonarQube} analysis and conclude by commenting on | |
701c559a EK |
3518 | the results from the unit tests. The configuration for the experiment is |
3519 | specified in \myref{tab:configurationCase1}. | |
98660ec0 | 3520 | |
701c559a EK |
3521 | \begin{table}[htb] |
3522 | \caption{Configuration for Case 1.} | |
3523 | \label{tab:configurationCase1} | |
3524 | \centering | |
3525 | \begin{tabularx}{\textwidth}{@{}>{\bfseries}L{0.67}L{1.33}@{}} | |
3526 | \toprule | |
3527 | \spancols{2}{Benchmark data} \\ | |
3528 | \midrule | |
3529 | Launch configuration & CaseStudy.launch \\ | |
3530 | Project & no.uio.ifi.refaktor.benchmark \\ | |
3531 | Repository & gitolite@git.uio.no:ifi-stolz-refaktor \\ | |
3532 | Commit & 43c16c04520746edd75f8dc2a1935781d3d9de6c \\ | |
3533 | \midrule | |
3534 | \spancols{2}{Input data} \\ | |
3535 | \midrule | |
3536 | Project & org.eclipse.jdt.ui \\ | |
3537 | Repository & git://git.eclipse.org/gitroot/jdt/eclipse.jdt.ui.git \\ | |
3538 | Commit & f218388fea6d4ec1da7ce22432726c244888bb6b \\ | |
3539 | Branch & R3\_8\_maintenance \\ | |
3540 | Tests suites & org.eclipse.jdt.ui.tests.AutomatedSuite, | |
3541 | org.eclipse.jdt.ui.tests.refactoring.all.\-AllAllRefactoringTests \\ | |
3542 | ||
3543 | \bottomrule | |
3544 | \end{tabularx} | |
3545 | \end{table} | |
98660ec0 EK |
3546 | \subsection{Statistics} |
3547 | The statistics gathered during the refactoring execution is presented in | |
3548 | \myref{tab:case1Statistics}. | |
3549 | ||
7fba220b EK |
3550 | \begin{table}[htb] |
3551 | \caption{Statistics after batch refactoring the Eclipse JDT UI project with | |
3552 | the \ExtractAndMoveMethod refactoring.} | |
3553 | \label{tab:case1Statistics} | |
3554 | \centering | |
3555 | \begin{tabularx}{\textwidth}{@{}>{\bfseries}L{1.5}R{0.5}@{}} | |
3556 | \toprule | |
3557 | \spancols{2}{Time used} \\ | |
3558 | \midrule | |
3559 | Total time & 98m38s \\ | |
3560 | Analysis time & 14m41s (15\%) \\ | |
3561 | Change time & 74m20s (75\%) \\ | |
3562 | Miscellaneous tasks & 9m37s (10\%) \\ | |
3563 | \midrule | |
3564 | \spancols{2}{Numbers of each type of entity analyzed} \\ | |
3565 | \midrule | |
3566 | Packages & 110 \\ | |
3567 | Compilation units & 2,097 \\ | |
3568 | Types & 3,152 \\ | |
3569 | Methods & 27,667 \\ | |
3570 | Text selections & 591,500 \\ | |
3571 | \midrule | |
3572 | \spancols{2}{Numbers for \ExtractAndMoveMethod refactoring candidates} \\ | |
3573 | \midrule | |
3574 | Methods chosen as candidates & 2,552 \\ | |
3575 | Methods NOT chosen as candidates & 25,115 \\ | |
3576 | Candidate selections (multiple per method) & 36,843 \\ | |
3577 | \midrule | |
3578 | \spancols{2}{\ExtractAndMoveMethod refactorings executed} \\ | |
3579 | \midrule | |
3580 | Fully executed & 2,469 \\ | |
3581 | Not fully executed & 83 \\ | |
3582 | Total attempts & 2,552 \\ | |
3583 | \midrule | |
3584 | \spancols{2}{Primitive refactorings executed} \\ | |
3585 | \spancols{2}{\small \ExtractMethod refactorings} \\ | |
3586 | \midrule | |
3587 | Performed & 2,483 \\ | |
3588 | Not performed & 69 \\ | |
3589 | Total attempts & 2,552 \\ | |
3590 | \midrule | |
3591 | \spancols{2}{\small \MoveMethod refactorings} \\ | |
3592 | \midrule | |
3593 | Performed & 2469 \\ | |
3594 | Not performed & 14 \\ | |
3595 | Total attempts & 2,483 \\ | |
3596 | ||
3597 | \bottomrule | |
3598 | \end{tabularx} | |
3599 | \end{table} | |
3600 | ||
98660ec0 | 3601 | \subsubsection{Execution time} |
e36eade0 | 3602 | I consider the total execution time of approximately 1.5 hours as being |
98660ec0 EK |
3603 | acceptable. It clearly makes the batch process unsuitable for doing any |
3604 | on-demand analysis or changes, but it is good enough for running periodic jobs, | |
3605 | like over-night analysis. | |
3606 | ||
3607 | As the statistics show, 75\% of the total time goes into making the actual code | |
3608 | changes. The time consumers are here the primitive \ExtractMethod and | |
3609 | \MoveMethod refactorings. Included in the change time is the parsing and | |
3610 | precondition checking done by the refactorings, as well as textual changes done | |
3611 | to files on disk. All this parsing and disk access is time-consuming, and | |
3612 | constitute a large part of the change time. | |
3613 | ||
3614 | In comparison, the pure analysis time, used to find suitable candidates, only | |
3615 | make up for 15\% of the total time consumed. This includes analyzing almost | |
3616 | 600,000 text selections, while the number of attempted executions of the | |
3617 | \ExtractAndMoveMethod refactoring are only about 2,500. So the number of | |
3618 | executed primitive refactorings are approximately 5,000. Assuming the time used | |
3619 | on miscellaneous tasks are used mostly for parsing source code for the analysis, | |
3620 | we can say that the time used for analyzing code is at most 25\% of the total | |
3621 | time. This means that for every primitive refactoring executed, we can analyze | |
3622 | around 360 text selections. So, with an average of about 21 text selections per | |
3623 | method, it is reasonable to say that we can analyze over 15 methods in the time | |
3624 | it takes to perform a primitive refactoring. | |
3625 | ||
3626 | \subsubsection{Refactoring candidates} | |
3627 | Out of the 27,667 methods that was analyzed, 2,552 methods contained selections | |
3628 | that was considered candidates for the \ExtractAndMoveMethod refactoring. This | |
3629 | is roughly 9\% off the methods in the project. These 9\% of the methods had on | |
3630 | average 14.4 text selections that was considered considered possible refactoring | |
3631 | candidates. | |
3632 | ||
3633 | \subsubsection{Executed refactorings} | |
3634 | 2,469 out of 2,552 attempts on executing the \ExtractAndMoveMethod refactoring | |
3635 | was successful, giving a success rate of 96.7\%. The failure rate of 3.3\% stem | |
3636 | from situations where the analysis finds a candidate selection, but the change | |
3637 | execution fails. This failure could be an exception that was thrown, and the | |
3638 | refactoring aborts. It could also be the precondition checking for one of the | |
3639 | primitive refactorings that gives us an error status, meaning that if the | |
3640 | refactoring proceeds, the code will contain compilation errors afterwards, | |
3641 | forcing the composite refactoring to abort. This means that if the | |
3642 | \ExtractMethod refactoring fails, no attempt is done for the \MoveMethod | |
3643 | refactoring. \todo{Redundant information? Put in benchmark chapter?} | |
3644 | ||
3645 | Out of the 2,552 \ExtractMethod refactorings that was attempted executed, 69 of | |
3646 | them failed. This give a failure rate of 2.7\% for the primitive refactoring. In | |
3647 | comparison, the \MoveMethod refactoring had a failure rate of 0.6 \% of the | |
3648 | 2,483 attempts on the refactoring. | |
3649 | ||
3650 | \subsection{\name{SonarQube} analysis} | |
701c559a EK |
3651 | Results from the \name{SonarQube} analysis is shown in |
3652 | \myref{tab:case1ResultsProfile1}. | |
98660ec0 | 3653 | |
c9488804 | 3654 | \begin{table}[htb] |
bc7b5d67 EK |
3655 | \caption{Results for analyzing the Eclipse JDT UI project, before and after |
3656 | the refactoring, with \name{SonarQube} and the \name{IFI Refaktor Case Study} | |
3657 | quality profile. (Bold numbers are better.)} | |
c9488804 EK |
3658 | \label{tab:case1ResultsProfile1} |
3659 | \centering | |
c9488804 EK |
3660 | \begin{tabularx}{\textwidth}{@{}>{\bfseries}L{1.5}R{0.25}R{0.25}@{}} |
3661 | \toprule | |
bc7b5d67 | 3662 | \textnormal{Number of issues for each rule} & Before & After \\ |
c9488804 EK |
3663 | \midrule |
3664 | Avoid too complex class & 81 & \textbf{79} \\ | |
3665 | Classes should not be coupled to too many other classes (Single | |
3666 | Responsibility Principle) & \textbf{1,098} & 1,199 \\ | |
3667 | Control flow statements \ldots{} should not be nested too deeply & 1,375 & | |
3668 | \textbf{1,285} \\ | |
3669 | Methods should not be too complex & 1,518 & \textbf{1,452} \\ | |
3670 | Methods should not have too many lines & 3,396 & \textbf{3,291} \\ | |
3671 | NPath Complexity & 348 & \textbf{329} \\ | |
bc7b5d67 | 3672 | Too many methods & \textbf{454} & 520 \\ |
6fabec9b | 3673 | \midrule |
bc7b5d67 | 3674 | Total number of issues & 8,270 & \textbf{8,155} \\ |
6fabec9b EK |
3675 | \midrule |
3676 | \midrule | |
bc7b5d67 | 3677 | \spancols{3}{Complexity} \\ |
6fabec9b EK |
3678 | \midrule |
3679 | Per function & 3.6 & \textbf{3.3} \\ | |
3680 | Per class & \textbf{29.5} & 30.4 \\ | |
3681 | Per file & \textbf{44.0} & 45.3 \\ | |
3682 | \midrule | |
3683 | Total complexity & \textbf{84,765} & 87,257 \\ | |
3684 | \midrule | |
3685 | \midrule | |
bc7b5d67 | 3686 | \spancols{3}{Numbers of each type of entity analyzed} \\ |
6fabec9b EK |
3687 | \midrule |
3688 | Files & 1,926 & 1,926 \\ | |
3689 | Classes & 2,875 & 2,875 \\ | |
3690 | Functions & 23,744 & 26,332 \\ | |
3691 | Accessors & 1,296 & 1,019 \\ | |
3692 | Statements & 162,768 & 165,145 \\ | |
3693 | Lines of code & 320,941 & 329,112 \\ | |
3694 | \midrule | |
bc7b5d67 | 3695 | Technical debt (in days) & \textbf{1,003.4} & 1,032.7 \\ |
c9488804 EK |
3696 | \bottomrule |
3697 | \end{tabularx} | |
3698 | \end{table} | |
6fabec9b | 3699 | |
701c559a | 3700 | \subsubsection{Diversity in the number of entities analyzed} |
e36eade0 | 3701 | The analysis performed by \name{SonarQube} is reporting fewer methods than found |
701c559a EK |
3702 | by the pre-refactoring analysis. \name{SonarQube} discriminates between |
3703 | functions (methods) and accessors, so the 1,296 accessors play a part in this | |
3704 | calculation. \name{SonarQube} also has the same definition as our plugin when | |
3705 | it comes to how a class is defined. Therefore is seems like \name{SonarQube} | |
3706 | misses 277 classes that our plugin handles. This can explain why the {SonarQube} | |
3707 | report differs from our numbers by approximately 2,500 methods, | |
3708 | ||
7fba220b EK |
3709 | \subsubsection{Complexity} |
3710 | On all complexity rules that works on the method level, the number of issues | |
3711 | decreases with between 3.1\% and 6.5\% from before to after the refactoring. The | |
3712 | average complexity of a method decreases from 3.6 to 3.3, which is an | |
3713 | improvement of about 8.3\%. So, on the method level, the refactoring must be | |
3714 | said to have a slightly positive impact. | |
3715 | ||
3716 | The improvement in complexity on the method level is somewhat traded for | |
3717 | complexity on the class level. The complexity per class metric is worsen by 3\% | |
3718 | from before to after. The issues for the ``Too many methods'' rule also | |
3719 | increases by 14.5\%. These numbers indicate that the refactoring makes quite a | |
3720 | lot of the classes a little more complex overall. This is the expected outcome, | |
3721 | since the \ExtractAndMoveMethod refactoring introduces almost 2,500 new methods | |
3722 | into the project. | |
3723 | ||
3724 | The only number that can save the refactoring's impact on complexity on the | |
3725 | class level, is the ``Avoid too complex class'' rule. It improves with 2.5\%, | |
3726 | thus indicating that the complexity is moderately better distributed between the | |
3727 | classes after the refactoring than before. | |
3728 | ||
3729 | \subsubsection{Coupling} | |
3730 | One of the hopes when starting this project, was to be able to make a | |
3731 | refactoring that could lower the coupling between classes. Better complexity at | |
39884128 | 3732 | the method level is a not very unexpected byproduct of dividing methods into |
7fba220b EK |
3733 | smaller parts. Lowering the coupling on the other hand, is a far greater task. |
3734 | This is also reflected in the results for the only coupling rule defined in the | |
3735 | \name{SonarQube} quality profile, namely the ``Classes should not be coupled to | |
3736 | too many | |
3737 | other classes (Single Responsibility Principle)'' rule. | |
3738 | ||
3739 | The number of issues for the coupling rule is 1,098 before the refactoring, and | |
3740 | 1,199 afterwards. This is an increase in issues of 9.2\%, and a blow for this | |
3741 | project. These numbers can be interpreted two ways. The first possibility is | |
3742 | that our assumptions are wrong, and that increasing indirection does not | |
3743 | decrease coupling between classes. The other possibility is that our analysis | |
3744 | and choices of candidate text selections are not good enough. I vote for the | |
e36eade0 | 3745 | second possibility. (Voting against the public opinion may also be a little |
7fba220b EK |
3746 | bold.) |
3747 | ||
3748 | What probably happens is, that many of the times the \ExtractAndMoveMethod | |
3749 | refactoring is performed, the \MoveMethod refactoring ``drags'' with it | |
3750 | references to classes that are unknown to the method destination. If it happens | |
3751 | to be so lucky that it removes a dependency from one class, it might as well | |
3752 | introduce three new dependencies to another class. In those situations that a | |
3753 | class does not know about the originating class of a moved method, the | |
078b1e4a | 3754 | \MoveMethod refactoring most certainly will introduce a dependency. This is |
39884128 | 3755 | because there is a |
078b1e4a | 3756 | bug\footnote{\url{https://bugs.eclipse.org/bugs/show\_bug.cgi?id=228635}} in the |
39884128 EK |
3757 | refactoring, making it pass an instance of the originating class as a reference |
3758 | to the moved method, regardless of whether the reference is used in the method | |
3759 | body or not. | |
7fba220b EK |
3760 | |
3761 | There is also the possibility that the heuristics used to find candidate text | |
3762 | selections are not good enough, they most certainly are not. I wish I had more | |
3763 | time to fine-tune them, and to complete the analysis part of the project, but | |
3764 | this is simply not the case. This becomes even clearer when analyzing the unit | |
3765 | test results for the after-code. | |
3766 | ||
3767 | \subsubsection{Totals} | |
3768 | On the bright side, the total number of issues is lower after the refactoring | |
3769 | than it was before. Before the refactoring, the total number of issues is | |
3770 | 8,270, and after it is 8,155. An improvement of only 1.4\%. | |
3771 | ||
3772 | Then \name{SonarQube} tells me that the total complexity has increased by | |
3773 | 2.9\%, and that the (more questionable) ``technical debt'' has increased from | |
3774 | 1,003.4 to 1,032.7 days, also a deterioration of 2.9\%. Although these numbers | |
3775 | are similar, no correlation has been found between them. | |
3776 | ||
3777 | \subsection{Unit tests} | |
39884128 EK |
3778 | The tests that have been run for the \name{Eclipse JDT UI} project, are the |
3779 | tests in the test suites specified as the main test suites on the JDT UI wiki | |
3780 | page on how to contribute to the | |
3781 | project\footnote{\url{https://wiki.eclipse.org/JDT\_UI/How\_to\_Contribute\#Unit\_Testing}}. | |
3782 | ||
3783 | \subsubsection{Before the refactoring} | |
3784 | Running the tests for the before-code of Eclipse JDT UI yielded 4 errors and 3 | |
3785 | failures for the \type{AutomatedSuite} test suite (2,007 test cases), and 2 | |
3786 | errors and | |
3787 | 3 failures for the \type{AllAllRefactoringTests} test suite (3,816 test cases). | |
3788 | ||
3789 | \subsubsection{After the refactoring} | |
3790 | The test results for the after-code of the Eclipse JDT UI project is another | |
01d46361 EK |
3791 | story. The reason for this is that during the setup for the unit tests, Eclipse |
3792 | now reports that the project contains 322 fatal errors, and a lot of errors that | |
3793 | probably follows from these. This is another blow for this master's project. | |
39884128 EK |
3794 | |
3795 | It has now been shown that the \ExtractAndMoveMethod refactoring, in its current | |
3796 | state, produces code that does not compile. Had these errors originated from | |
3797 | only one bug, it would not have been much of a problem, but this is not the | |
e36eade0 | 3798 | case. By only looking at some random compilation problems in the refactored code, |
01d46361 EK |
3799 | I came up with at least four different bugs \todo{write bug reports?} that |
3800 | caused those problems. I then stopped looking for more, since some of the bugs | |
3801 | would take more time to fix than I could justify using on them at this point. | |
39884128 EK |
3802 | |
3803 | The only thing that can be said in my defence, is that all the compilation | |
3804 | errors could have been avoided if the type of situations that causes them was | |
3805 | properly handled by the primitive refactorings, that again are supported by the | |
e36eade0 | 3806 | Eclipse JDT UI project. All of the four randomly found bugs that I mentioned |
39884128 EK |
3807 | before, are also weaknesses of the \MoveMethod refactoring. If the primitive |
3808 | refactorings had detected the up-coming errors | |
3809 | in their precondition checking phase, the refactorings would have been aborted, | |
3810 | since this is how the \ExtractAndMoveMethod refactoring handles such situations. | |
3811 | ||
3812 | Of course, taking all possible situations into account is an immense task. This | |
3813 | is one of the reasons for the failure. A complete analysis is too big of a task | |
3814 | for this master's project to handle. Looking at it now, this comes as no | |
3815 | surprise, since the task is obviously also too big for the creators of the | |
3816 | primitive \MoveMethod refactoring. This shows that the underlying primitive | |
3817 | refactorings are not complete enough to be fully relied upon for avoiding | |
3818 | compilation errors. | |
3819 | ||
3820 | Considering all these problems, it is difficult to know how to interpret the | |
3821 | unit test results from after refactoring the Eclipse JDT UI. The | |
3822 | \type{AutomatedSuite} reported 565 errors and 5 failures. Three of the failures | |
3823 | were the same as reported before the refactoring took place, so two of them are | |
3824 | new. For these two cases it is not immediately apparent what makes them behave | |
3825 | differently. The program is so complex that to analyze it to find this out, we | |
3826 | might need more powerful methods than just manually analyzing its source code. | |
3827 | This is somewhat characteristic for imperative programming: The programs are | |
3828 | often hard to analyze and understand. | |
3829 | ||
3830 | For the \type{AllAllRefactoringTests} test suite, the three failures are gone, | |
3831 | but the two errors have grown to 2,257 errors. I will not try to analyze those | |
3832 | errors. | |
3833 | ||
3834 | What I can say, is that it is likely that the \ExtractAndMoveMethod refactoring | |
3835 | has introduced some unintended behavioral changes. Let us say that the | |
3836 | refactoring introduces at least two behavior-altering changes for every 2,500 | |
3837 | executions. More than that is difficult to say about the behavior-preserving | |
3838 | properties of the \ExtractAndMoveMethod refactoring, at this point. | |
3839 | ||
3840 | \subsection{Conclusions} | |
3841 | After automatically analyzing and executing the \ExtractAndMoveMethod | |
3842 | refactoring for all the methods in the Eclipse JDT UI project, the results does | |
3843 | not look that promising. For this case, the refactoring seems almost unusable as | |
3844 | it is now. The error rate and measurements done tells us this. | |
3845 | ||
3846 | The refactoring makes the code a little less complex at the method level. But | |
3847 | this is merely a side effect of extracting methods, and holds little scientific | |
3848 | value. When it comes to the overall complexity, it is increased, although it is | |
3849 | slightly better spread among the classes. | |
3850 | ||
3851 | The analysis done before the \ExtractAndMoveMethod refactoring, is currently not | |
3852 | complete enough to make the refactoring useful. It introduces too many errors in | |
3853 | the code, and the code may change it's behavior. It also remains to prove that | |
3854 | large scale refactoring with it can decrease coupling between classes. A better | |
3855 | analysis may prove this, but in its present state, the opposite is the fact. The | |
3856 | coupling measurements done by \name{SonarQube} shows this. | |
3857 | ||
3858 | On the bright side, the performance of the refactoring process is not that bad. | |
3859 | It shows that it is possible to make a tool the way we do, if we can make the | |
3860 | tool do anything useful. As long as the analysis phase is not going to involve | |
3861 | anything that uses to much disk access, a lot of analysis can be done in a | |
3862 | reasonable amount of time. | |
3863 | ||
3864 | The time used on performing the actual changes excludes a trial and error | |
3865 | approach with the tools used in this master's project. In a trial and error | |
3866 | approach, you could for instance be using the primitive refactorings used in | |
3867 | this project to refactor code, and only then make decisions based on the effect, | |
3868 | possibly shown by traditional software metrics. The problem with the approach | |
3869 | taken in this project, compared to a trial and error approach, is that using | |
3870 | heuristics beforehand is much more complicated. But on the other hand, a trial | |
3871 | and error approach would still need to face the challenges of producing code | |
3872 | that does compile without errors. If using refactorings that could produce | |
3873 | in-memory changes, a trial and error approach could be made more efficient. | |
078b1e4a EK |
3874 | |
3875 | \section{Case 2: The \type{no.uio.ifi.refaktor} project} | |
01d46361 | 3876 | In this case we will see a form of the ``dogfooding'' methodology used, when |
e36eade0 | 3877 | refactoring our own \type{no.uio.ifi.refaktor} project with the |
078b1e4a EK |
3878 | \ExtractAndMoveMethod refactoring. |
3879 | ||
3880 | In this case I will try to point out some differences from case 1, and how they | |
3881 | impact the execution of the benchmark. The refaktor project is 39 times smaller | |
3882 | than the Eclipse JDT UI project, measured in lines of code. This will make | |
3883 | things a bit more transparent. It will therefore be interesting to see if this | |
3884 | case can shed light on any aspect of our project that was lost in the larger | |
3885 | case 1. | |
3886 | ||
eb913f75 EK |
3887 | The configuration for the experiment is specified in |
3888 | \myref{tab:configurationCase2}. | |
3889 | ||
3890 | \begin{table}[htb] | |
3891 | \caption{Configuration for Case 2.} | |
3892 | \label{tab:configurationCase2} | |
3893 | \centering | |
3894 | \begin{tabularx}{\textwidth}{@{}>{\bfseries}L{0.67}L{1.33}@{}} | |
3895 | \toprule | |
3896 | \spancols{2}{Benchmark data} \\ | |
3897 | \midrule | |
3898 | Launch configuration & CaseStudyDogfooding.launch \\ | |
3899 | Project & no.uio.ifi.refaktor.benchmark \\ | |
3900 | Repository & gitolite@git.uio.no:ifi-stolz-refaktor \\ | |
3901 | Commit & 43c16c04520746edd75f8dc2a1935781d3d9de6c \\ | |
3902 | \midrule | |
3903 | \spancols{2}{Input data} \\ | |
3904 | \midrule | |
3905 | Project & no.uio.ifi.refaktor \\ | |
3906 | Repository & gitolite@git.uio.no:ifi-stolz-refaktor \\ | |
3907 | Commit & 43c16c04520746edd75f8dc2a1935781d3d9de6c \\ | |
3908 | Branch & master \\ | |
e36eade0 | 3909 | Test configuration & no.uio.ifi.refaktor.tests/ExtractTest.launch \\ |
eb913f75 EK |
3910 | \bottomrule |
3911 | \end{tabularx} | |
3912 | \end{table} | |
078b1e4a | 3913 | |
eb913f75 EK |
3914 | \subsection{Statistics} |
3915 | The statistics gathered during the refactoring execution is presented in | |
3916 | \myref{tab:case2Statistics}. | |
3917 | ||
3918 | \begin{table}[htb] | |
3919 | \caption{Statistics after batch refactoring the \type{no.uio.ifi.refaktor} | |
3920 | project with the \ExtractAndMoveMethod refactoring.} | |
3921 | \label{tab:case2Statistics} | |
3922 | \centering | |
3923 | \begin{tabularx}{\textwidth}{@{}>{\bfseries}L{1.5}R{0.5}@{}} | |
3924 | \toprule | |
3925 | \spancols{2}{Time used} \\ | |
3926 | \midrule | |
3927 | Total time & 1m15s \\ | |
3928 | Analysis time & 0m18s (24\%) \\ | |
3929 | Change time & 0m47s (63\%) \\ | |
3930 | Miscellaneous tasks & 0m10s (14\%) \\ | |
3931 | \midrule | |
3932 | \spancols{2}{Numbers of each type of entity analyzed} \\ | |
3933 | \midrule | |
3934 | Packages & 33 \\ | |
3935 | Compilation units & 154 \\ | |
3936 | Types & 168 \\ | |
3937 | Methods & 1,070 \\ | |
3938 | Text selections & 8,609 \\ | |
3939 | \midrule | |
3940 | \spancols{2}{Numbers for \ExtractAndMoveMethod refactoring candidates} \\ | |
3941 | \midrule | |
3942 | Methods chosen as candidates & 58 \\ | |
3943 | Methods NOT chosen as candidates & 1,012 \\ | |
3944 | Candidate selections (multiple per method) & 227 \\ | |
3945 | \midrule | |
3946 | \spancols{2}{\ExtractAndMoveMethod refactorings executed} \\ | |
3947 | \midrule | |
3948 | Fully executed & 53 \\ | |
3949 | Not fully executed & 5 \\ | |
3950 | Total attempts & 58 \\ | |
3951 | \midrule | |
3952 | \spancols{2}{Primitive refactorings executed} \\ | |
3953 | \spancols{2}{\small \ExtractMethod refactorings} \\ | |
3954 | \midrule | |
3955 | Performed & 56 \\ | |
3956 | Not performed & 2 \\ | |
3957 | Total attempts & 58 \\ | |
3958 | \midrule | |
3959 | \spancols{2}{\small \MoveMethod refactorings} \\ | |
3960 | \midrule | |
3961 | Performed & 53 \\ | |
3962 | Not performed & 3 \\ | |
3963 | Total attempts & 56 \\ | |
3964 | ||
3965 | \bottomrule | |
3966 | \end{tabularx} | |
3967 | \end{table} | |
3968 | ||
078b1e4a EK |
3969 | \subsubsection{Differences} |
3970 | There are some differences between the two projects that make them a little | |
3971 | difficult to compare by performance. | |
3972 | ||
3973 | \paragraph{Different complexity.} | |
3974 | Although the JDT UI project is 39 times greater than the refaktor project in | |
3975 | terms of lines of code, it is only about 26 times its size measured in numbers | |
3976 | of methods. This means that the methods in the refaktor project are smaller in | |
3977 | average than in the JDT project. This is also reflected in the \name{SonarQube} | |
3978 | report, where the complexity per method for the JDT project is 3.6, while the | |
3979 | refaktor project has a complexity per method of 2.1. | |
3980 | ||
3981 | \paragraph{Number of selections per method.} | |
3982 | The analysis for the JDT project processed 21 text selections per method in | |
3983 | average. This number for the refaktor project is only 8 selections per method | |
3984 | analyzed. This is a direct consequence of smaller methods. | |
3985 | ||
3986 | \paragraph{Different candidates to methods ratio.} | |
3987 | The differences in how the projects are factored are also reflected in the | |
3988 | ratios for how many methods that are chosen as candidates compared to the total | |
3989 | number of methods analyzed. For the JDT project, 9\% of the methods was | |
3990 | considered to be candidates, while for the refaktor project, only 5\% of the | |
3991 | methods was chosen. | |
3992 | ||
3993 | \paragraph{The average number of possible candidate selection.} | |
e36eade0 | 3994 | For the methods that are chosen as candidates, the average number of possible |
078b1e4a EK |
3995 | candidate selections for these methods differ quite much. For the JDT project, |
3996 | the number of possible candidate selections for these methods were 14.44 | |
3997 | selections per method, while the candidate methods in the refaktor project had | |
3998 | only 3.91 candidate selections to choose from, in average. | |
3999 | ||
eb913f75 | 4000 | \subsubsection{Execution time} |
078b1e4a | 4001 | The differences in complexity, and the different candidate methods to total |
e36eade0 | 4002 | number of methods ratios, is shown in the distributions of the execution times. |
078b1e4a EK |
4003 | For the JDT project, 75\% of the total time was used on the actual changes, |
4004 | while for the refaktor project, this number was only 63\%. | |
eb913f75 | 4005 | |
078b1e4a EK |
4006 | For the JDT project, the benchmark used on average 0.21 seconds per method in |
4007 | the project, while for the refaktor project it used only 0.07 seconds per | |
4008 | method. So the process used 3 times as much time per method for the JDT project | |
4009 | than for the refaktor project. | |
4010 | ||
4011 | While the JDT project is 39 times larger than the refaktor project measured in | |
4012 | lines of code, the benchmark used about 79 times as long time on it than for the | |
4013 | refaktor project. Relatively, this is about twice as long. | |
4014 | ||
4015 | Since the details of these execution times are not that relevant to this | |
4016 | master's project, only their magnitude, I will leave them here. | |
eb913f75 EK |
4017 | |
4018 | \subsubsection{Executed refactorings} | |
078b1e4a EK |
4019 | For the composite \ExtractAndMoveMethod refactoring performed in case 2, 53 |
4020 | successful attempts out of 58 gives a success rate of 91.4\%. This is 5.3 | |
e36eade0 | 4021 | percentage points worse than for case 1. |
eb913f75 EK |
4022 | |
4023 | \subsection{\name{SonarQube} analysis} | |
4024 | Results from the \name{SonarQube} analysis is shown in | |
4025 | \myref{tab:case2ResultsProfile1}. | |
4026 | ||
078b1e4a EK |
4027 | Not much is to be said about these results. The trends in complexity and |
4028 | coupling are the same. We end up a little worse after the refactoring process | |
4029 | than before. | |
4030 | ||
eb913f75 EK |
4031 | \begin{table}[htb] |
4032 | \caption{Results for analyzing the \var{no.uio.ifi.refaktor} project, before | |
4033 | and after the refactoring, with \name{SonarQube} and the \name{IFI Refaktor | |
4034 | Case Study} quality profile. (Bold numbers are better.)} | |
4035 | \label{tab:case2ResultsProfile1} | |
4036 | \centering | |
4037 | \begin{tabularx}{\textwidth}{@{}>{\bfseries}L{1.5}R{0.25}R{0.25}@{}} | |
4038 | \toprule | |
4039 | \textnormal{Number of issues for each rule} & Before & After \\ | |
4040 | \midrule | |
4041 | Avoid too complex class & 1 & 1 \\ | |
4042 | Classes should not be coupled to too many other classes (Single | |
4043 | Responsibility Principle) & \textbf{29} & 34 \\ | |
4044 | Control flow statements \ldots{} should not be nested too deeply & 24 & | |
4045 | \textbf{21} \\ | |
4046 | Methods should not be too complex & 17 & \textbf{15} \\ | |
4047 | Methods should not have too many lines & 41 & \textbf{40} \\ | |
4048 | NPath Complexity & 3 & 3 \\ | |
4049 | Too many methods & \textbf{13} & 15 \\ | |
4050 | \midrule | |
4051 | Total number of issues & \textbf{128} & 129 \\ | |
4052 | \midrule | |
4053 | \midrule | |
4054 | \spancols{3}{Complexity} \\ | |
4055 | \midrule | |
4056 | Per function & 2.1 & 2.1 \\ | |
4057 | Per class & \textbf{12.5} & 12.9 \\ | |
4058 | Per file & \textbf{13.8} & 14.2 \\ | |
4059 | \midrule | |
4060 | Total complexity & \textbf{2,089} & 2,148 \\ | |
4061 | \midrule | |
4062 | \midrule | |
4063 | \spancols{3}{Numbers of each type of entity analyzed} \\ | |
4064 | \midrule | |
4065 | Files & 151 & 151 \\ | |
4066 | Classes & 167 & 167 \\ | |
4067 | Functions & 987 & 1,045 \\ | |
4068 | Accessors & 35 & 30 \\ | |
4069 | Statements & 3,355 & 3,416 \\ | |
078b1e4a | 4070 | Lines of code & 8,238 & 8,460 \\ |
eb913f75 EK |
4071 | \midrule |
4072 | Technical debt (in days) & \textbf{19.0} & 20.7 \\ | |
4073 | \bottomrule | |
4074 | \end{tabularx} | |
4075 | \end{table} | |
4076 | ||
078b1e4a EK |
4077 | \subsection{Unit tests} |
4078 | The tests used for this case are the same that has been developed throughout the | |
4079 | master's project. | |
4080 | ||
4081 | The code that was refactored for this case suffered from some of the problems | |
4082 | discovered in case 1. This means that the after-code for case 2 also contained | |
4083 | compilation errors, but they were not as many. The code contained only 6 errors | |
4084 | that made the code not compile. | |
4085 | ||
4086 | All of the errors made, originated from the same bug. It is a bug that happens | |
4087 | in situation where a class instance creation is moved from between packages, and | |
4088 | the class for the instance is package-private. The \MoveMethod refactoring does | |
e36eade0 | 4089 | not detect that there will be a visibility problem, and neither does it promote |
078b1e4a EK |
4090 | the package-private class to be public. |
4091 | ||
4092 | Since the errors was easy to fix manually, I corrected them and ran the unit | |
4093 | tests as planned. Before the refactoring, all tests passed. All tests also | |
4094 | passed after the refactoring, with the six error corrections. Since the | |
4095 | corrections done is not of a kind that could make the behavior of the program | |
4096 | change, it is likely that the refactorings done to the | |
4097 | \type{no.uio.ifi.refaktor} project did not change its behavior. This is also | |
4098 | supported by the informal experiment presented next. | |
4099 | ||
4100 | \subsection{An informal experiment} | |
e36eade0 | 4101 | To complete the task of ``eating my own dog food'', I conducted an informal |
078b1e4a EK |
4102 | experiment where I used the refactored version of the \type{no.uio.ifi.refaktor} |
4103 | project, with the corrections, to again refaktor ``itself''. | |
4104 | ||
4105 | The experiment produced code containing the same six errors as after the | |
4106 | previous experiment. I also compared the after-code from the two experiments | |
4107 | with a diff-tool. The only differences found was different method names. This is | |
4108 | expected, since the method names are randomly generated by the | |
4109 | \ExtractAndMoveMethod refactoring. | |
eb913f75 | 4110 | |
078b1e4a EK |
4111 | The outcome of this simple experiment makes me more confident that the |
4112 | \ExtractAndMoveMethod refactoring made only behavior-preserving changes to the | |
4113 | \type{no.uio.ifi.refaktor} project, apart from the compilation errors. | |
eb913f75 | 4114 | |
078b1e4a EK |
4115 | \subsection{Conclusions} |
4116 | The differences in complexity between the Eclipse JDT UI project and the | |
4117 | \type{no.uio.ifi.refaktor} project, clearly influenced the differences in their | |
4118 | execution times. This is mostly because fewer of the methods were chosen to be | |
4119 | refactored for the refaktor project than for the JDT project. What this makes | |
e36eade0 | 4120 | difficult, is to know if there are any severe performance penalties associated |
078b1e4a | 4121 | with refactoring on a large project compared to a small one. |
eb913f75 | 4122 | |
078b1e4a EK |
4123 | The trends in the \name{SonarQube} analysis are the same for this case as for |
4124 | the previous one. This gives more confidence in the these results. | |
eb913f75 | 4125 | |
078b1e4a EK |
4126 | By refactoring our own code and using it again to refactor our code, we showed |
4127 | that it is possible to write an automated composite refactoring that works for | |
4128 | many cases. That it probably did not alter the behavior of a smaller project | |
4129 | shows us nothing more than that though, and might just be a coincidence. | |
eb913f75 | 4130 | |
078b1e4a | 4131 | \section{Summary} |
01d46361 | 4132 | \todoin{Write? Or wrap up in final conclusions?} |
c9488804 | 4133 | |
bc7b5d67 | 4134 | \chapter{Benchmarking}\label{sec:benchmarking} |
60065669 | 4135 | This part of the master's project is located in the \name{Eclipse} project |
356782a0 EK |
4136 | \code{no.uio.ifi.refaktor.benchmark}. The purpose of it is to run the equivalent |
4137 | of the \type{SearchBasedExtractAndMoveMethodChanger} | |
4138 | \see{searchBasedExtractAndMoveMethodChanger} over a larger software project, | |
3ab3e132 | 4139 | both to test its robustness but also its effect on different software metrics. |
356782a0 EK |
4140 | |
4141 | \section{The benchmark setup} | |
fe0a4c48 | 4142 | The benchmark itself is set up as a \name{JUnit} test case. This is a convenient |
7940aed1 EK |
4143 | setup, and utilizes the \name{JUnit Plugin Test Launcher}. This provides us with |
4144 | a fully functional \name{Eclipse} workbench. Most importantly, this gives us | |
4145 | access to the Java Model of \name{Eclipse} \see{javaModel}. | |
356782a0 EK |
4146 | |
4147 | \subsection{The ProjectImporter} | |
4148 | The Java project that is going to be used as the data for the benchmark, must be | |
4149 | imported into the JUnit workspace. This is done by the | |
4150 | \typewithref{no.uio.ifi.refaktor.benchmark}{ProjectImporter}. The importer | |
4151 | require the absolute path to the project description file. It is named | |
4152 | \code{.project} and is located at the root of the project directory. | |
4153 | ||
4154 | The project description is loaded to find the name of the project to be | |
4155 | imported. The project that shall be the destination for the import is created in | |
4156 | the workspace, on the base of the name from the description. Then an import | |
4157 | operation is created, based on both the source and destination information. The | |
4158 | import operation is run to perform the import. | |
4159 | ||
4160 | I have found no simple API call to accomplish what the importer does, which | |
fe0a4c48 EK |
4161 | tells me that it may not be too many people performing this particular action. |
4162 | The solution to the problem was found on \name{Stack | |
356782a0 | 4163 | Overflow}\footnote{\url{https://stackoverflow.com/questions/12401297}}. It |
3ab3e132 | 4164 | contains enough dirty details to be considered inconvenient to use, if not |
356782a0 EK |
4165 | wrapping it in a class like my \type{ProjectImporter}. One would probably have |
4166 | to delve into the source code for the import wizard to find out how the import | |
4167 | operation works, if no one had already done it. | |
4168 | ||
8fe94c0b EK |
4169 | \section{Statistics} |
4170 | Statistics for the analysis and changes is captured by the | |
4171 | \typewithref{no.uio.ifi.refaktor.aspects}{StatisticsAspect}. This an | |
fe0a4c48 | 4172 | \emph{aspect} written in \name{AspectJ}. |
8fe94c0b EK |
4173 | |
4174 | \subsection{AspectJ} | |
fe0a4c48 | 4175 | \name{AspectJ}\footnote{\url{http://eclipse.org/aspectj/}} is an extension to |
8fe94c0b EK |
4176 | the Java language, and facilitates combining aspect-oriented programming with |
4177 | the object-oriented programming in Java. | |
4178 | ||
4179 | Aspect-oriented programming is a programming paradigm that is meant to isolate | |
4180 | so-called \emph{cross-cutting concerns} into their own modules. These | |
dd73ba39 EK |
4181 | cross-cutting concerns are functionalities that spans over multiple classes, but |
4182 | may not belong naturally in any of them. It can be functionality that does not | |
4183 | concern the business logic of an application, and thus may be a burden when | |
4184 | entangled with parts of the source code it does not really belong. Examples | |
4185 | include logging, debugging, optimization and security. | |
8fe94c0b | 4186 | |
416b6888 EK |
4187 | Aspects are interacting with other modules by defining advices. The concept of |
4188 | an \emph{advice} is known from both aspect-oriented and functional | |
4189 | programming\citing{wikiAdvice2014}. It is a function that modifies another | |
4190 | function when the latter is run. An advice in AspectJ is somewhat similar to a | |
4191 | method in Java. It is meant to alter the behavior of other methods, and contains | |
4192 | a body that is executed when it is applied. | |
8fe94c0b EK |
4193 | |
4194 | An advice can be applied at a defined \emph{pointcut}. A pointcut picks out one | |
4195 | or more \emph{join points}. A join point is a well-defined point in the | |
4196 | execution of a program. It can occur when calling a method defined for a | |
4197 | particular class, when calling all methods with the same name, | |
4198 | accessing/assigning to a particular field of a given class and so on. An advice | |
dd73ba39 EK |
4199 | can be declared to run both before, after returning from a pointcut, when there |
4200 | is thrown an exception in the pointcut or after the pointcut either returns or | |
4201 | throws an exception. In addition to picking out join points, a pointcut can | |
4202 | also bind variables from its context, so they can be accessed in the body of an | |
4203 | advice. An example of a pointcut and an advice is found in | |
4204 | \myref{lst:aspectjExample}. | |
8fe94c0b EK |
4205 | |
4206 | \begin{listing}[h] | |
c8088eec | 4207 | \begin{minted}{aspectj} |
8fe94c0b EK |
4208 | pointcut methodAnalyze( |
4209 | SearchBasedExtractAndMoveMethodAnalyzer analyzer) : | |
4210 | call(* SearchBasedExtractAndMoveMethodAnalyzer.analyze()) | |
4211 | && target(analyzer); | |
4212 | ||
4213 | after(SearchBasedExtractAndMoveMethodAnalyzer analyzer) : | |
4214 | methodAnalyze(analyzer) { | |
4215 | statistics.methodCount++; | |
4216 | debugPrintMethodAnalysisProgress(analyzer.method); | |
4217 | } | |
4218 | \end{minted} | |
4219 | \caption{An example of a pointcut named \method{methodAnalyze}, | |
4220 | and an advice defined to be applied after it has occurred.} | |
4221 | \label{lst:aspectjExample} | |
4222 | \end{listing} | |
4223 | ||
dd73ba39 EK |
4224 | \subsection{The Statistics class} |
4225 | The statistics aspect stores statistical information in an object of type | |
4226 | \type{Statistics}. As of now, the aspect needs to be initialized at the point in | |
4227 | time where it is desired that it starts its data gathering. At any point in time | |
4228 | the statistics aspect can be queried for a snapshot of the current statistics. | |
4229 | ||
4230 | The \type{Statistics} class also include functionality for generating a report | |
4231 | of its gathered statistics. The report can be given either as a string or it can | |
4232 | be written to a file. | |
4233 | ||
4234 | \subsection{Advices} | |
4235 | The statistics aspect contains advices for gathering statistical data from | |
4236 | different parts of the benchmarking process. It captures statistics from both | |
4237 | the analysis part and the execution part of the composite \ExtractAndMoveMethod | |
4238 | refactoring. | |
4239 | ||
4240 | For the analysis part, there are advices to count the number of text selections | |
4241 | analyzed and the number of methods, types, compilation units and packages | |
4242 | analyzed. There are also advices that counts for how many of the methods there | |
4243 | is found a selection that is a candidate for the refactoring, and for how many | |
3ab3e132 | 4244 | methods there is not. |
dd73ba39 EK |
4245 | |
4246 | There exists advices for counting both the successful and unsuccessful | |
4247 | executions of all the refactorings. Both for the \ExtractMethod and \MoveMethod | |
4248 | refactorings in isolation, as well as for the combination of them. | |
4249 | ||
8fe94c0b | 4250 | \section{Optimizations} |
41293210 | 4251 | When looking for optimizations to make for the benchmarking process, I used the |
60065669 EK |
4252 | \name{VisualVM}\footnote{\url{http://visualvm.java.net/}} \gloss{profiler} for |
4253 | the Java Virtual Machine to both profile the application and also to make memory | |
4254 | dumps of its heap. | |
8fe94c0b EK |
4255 | |
4256 | \subsection{Caching} | |
60065669 EK |
4257 | When \gloss{profiling} the benchmark process before making any optimizations, it |
4258 | early became apparent that the parsing of source code was a place to direct | |
4259 | attention towards. This discovery was done when only \emph{analyzing} source | |
4260 | code, before trying to do any \emph{manipulation} of it. Caching of the parsed | |
4261 | ASTs seemed like the best way to save some time, as expected. With only a simple | |
4262 | cache of the most recently used AST, the analysis time was speeded up by a | |
4263 | factor of around 20. This number depends a little upon which type of system the | |
4264 | analysis is run. | |
41293210 EK |
4265 | |
4266 | The caching is managed by a cache manager, that now, by default, utilizes the | |
4267 | not so well known feature of Java called a \emph{soft reference}. Soft | |
4268 | references are best explained in the context of weak references. A \emph{weak | |
4269 | reference} is a reference to an object instance that is only guaranteed to | |
4270 | persist as long as there is a \emph{strong reference} or a soft reference | |
4271 | referring the same object. If no such reference is found, its referred object is | |
4272 | garbage collected. A strong reference is basically the same as a regular Java | |
4273 | reference. A soft reference has the same guarantees as a week reference when it | |
4274 | comes to its relation to strong references, but it is not necessarily garbage | |
4275 | collected whenever there exists no strong references to it. A soft reference | |
4276 | \emph{may} reside in memory as long as the JVM has enough free memory in the | |
4277 | heap. A soft reference will therefore usually perform better than a weak | |
4278 | reference when used for simple caching and similar tasks. The way to use a | |
4279 | soft/weak reference is to as it for its referent. The return value then has to | |
4280 | be tested to check that it is not \var{null}. For the basic usage of soft | |
4281 | references, see \myref{lst:softReferenceExample}. For a more thorough | |
4282 | explanation of weak references in general, see\citing{weakRef2006}. | |
4283 | ||
4284 | \begin{listing}[h] | |
4285 | \begin{minted}{java} | |
4286 | // Strong reference | |
4287 | Object strongRef = new Object(); | |
4288 | ||
4289 | // Soft reference | |
4290 | SoftReference<Object> softRef = | |
4291 | new SoftReference<Object>(new Object()); | |
4292 | ||
4293 | // Using the soft reference | |
4294 | Object obj = softRef.get(); | |
4295 | if (obj != null) { | |
4296 | // Use object here | |
4297 | } | |
4298 | \end{minted} | |
4299 | \caption{Showing the basic usage of soft references. Weak references is used the | |
4300 | same way. {\footnotesize (The references are part of the \code{java.lang.ref} | |
4301 | package.)}} | |
4302 | \label{lst:softReferenceExample} | |
4303 | \end{listing} | |
4304 | ||
4305 | The cache based on soft references has no limit for how many ASTs it caches. It | |
4306 | is generally not advisable to keep references to ASTs for prolonged periods of | |
4307 | time, since they are expensive structures to hold on to. For regular plugin | |
fe0a4c48 | 4308 | development, \name{Eclipse} recommends not creating more than one AST at a time to |
41293210 EK |
4309 | limit memory consumption. Since the benchmarking has nothing to do with user |
4310 | experience, and throughput is everything, these advices are intentionally | |
fe0a4c48 | 4311 | ignored. This means that during the benchmarking process, the target \name{Eclipse} |
41293210 EK |
4312 | application may very well work close to its memory limit for the heap space for |
4313 | long periods during the benchmark. | |
8fe94c0b | 4314 | |
81cf9554 EK |
4315 | \subsection{Candidates stored as mementos} |
4316 | When performing large scale analysis of source code for finding candidates to | |
4317 | the \ExtractAndMoveMethod refactoring, memory is an issue. One of the inputs to | |
4318 | the refactoring is a variable binding. This variable binding indirectly retains | |
4319 | a whole AST. Since ASTs are large structures, this quickly leads to an | |
4320 | \type{OutOfMemoryError} if trying to analyze a large project without optimizing | |
4321 | how we store the candidates data. This means that the JVM cannot allocate more | |
4322 | memory for out benchmark, and it exists disgracefully. | |
4323 | ||
4324 | A possible solution could be to just allow the JVM to allocate even more memory, | |
4325 | but this is not a dependable solution. The allocated memory could easily | |
4326 | supersede the physical memory of a machine, and that would make the benchmark go | |
4327 | really slow. | |
4328 | ||
4329 | Thus, the candidates data must be stored in another format. Therefore, we use | |
4330 | the \gloss{mementoPattern} to store the variable binding information. This is | |
4331 | done in a way that makes it possible to retrieve the variable binding at a later | |
4332 | point. The data that is stored to achieve this, is the key to the original | |
4333 | variable binding. In addition to the key, we know which method and text | |
4334 | selection the variable is referenced in, so that we can find it by parsing the | |
4335 | source code and search for it when it is needed. | |
356782a0 | 4336 | |
98660ec0 EK |
4337 | \section{Handling failures} |
4338 | \todoin{write} | |
4339 | ||
a7514fbd EK |
4340 | |
4341 | \chapter{Technicalities} | |
4342 | ||
4343 | \section{Source code organization} | |
60065669 | 4344 | All the parts of this master's project is under version control with |
a7514fbd EK |
4345 | \name{Git}\footnote{\url{http://git-scm.com/}}. |
4346 | ||
4347 | The software written is organized as some \name{Eclipse} plugins. Writing a plugin is | |
4348 | the natural way to utilize the API of \name{Eclipse}. This also makes it possible to | |
4349 | provide a user interface to manually run operations on selections in program | |
4350 | source code or whole projects/packages. | |
4351 | ||
4352 | When writing a plugin in \name{Eclipse}, one has access to resources such as the | |
4353 | current workspace, the open editor and the current selection. | |
4354 | ||
4355 | The thesis work is contained in the following Eclipse projects: | |
4356 | ||
4357 | \begin{description} | |
4358 | \item[no.uio.ifi.refaktor] \hfill \\ This is the main Eclipse plugin | |
4359 | project, and contains all of the business logic for the plugin. | |
4360 | ||
4361 | \item[no.uio.ifi.refaktor.tests] \hfill \\ | |
4362 | This project contains the tests for the main plugin. | |
4363 | ||
4364 | \item[no.uio.ifi.refaktor.examples] \hfill \\ | |
4365 | Contains example code used in testing. It also contains code for managing | |
4366 | this example code, such as creating an Eclipse project from it before a test | |
4367 | run. | |
4368 | ||
4369 | \item[no.uio.ifi.refaktor.benchmark] \hfill \\ | |
4370 | This project contains code for running search based versions of the | |
4371 | composite refactoring over selected Eclipse projects. | |
4372 | ||
4373 | \item[no.uio.ifi.refaktor.releng] \hfill \\ | |
4374 | Contains the rmap, queries and target definitions needed by by Buckminster | |
4375 | on the Jenkins continuous integration server. | |
4376 | ||
4377 | \end{description} | |
4378 | ||
4379 | \subsection{The no.uio.ifi.refaktor project} | |
4380 | ||
4381 | \subsubsection{no.uio.ifi.refaktor.analyze} | |
e36eade0 EK |
4382 | This package, and its sub-packages, contains code that is used for analyzing |
4383 | Java source code. The most important sub-packages are presented below. | |
a7514fbd EK |
4384 | |
4385 | \begin{description} | |
4386 | \item[no.uio.ifi.refaktor.analyze.analyzers] \hfill \\ | |
4387 | This package contains source code analyzers. These are usually responsible | |
4388 | for analyzing text selections or running specialized analyzers for different | |
4389 | kinds of entities. Their structure are often hierarchical. This means that | |
4390 | you have an analyzer for text selections, that in turn is utilized by an | |
4391 | analyzer that analyzes all the selections of a method. Then there are | |
4392 | analyzers for analyzing all the methods of a type, all the types of a | |
4393 | compilation unit, all the compilation units of a package, and, at last, all | |
4394 | of the packages in a project. | |
4395 | ||
4396 | \item[no.uio.ifi.refaktor.analyze.checkers] \hfill \\ | |
4397 | A package containing checkers. The checkers are classes used to validate | |
4398 | that a selection can be further analyzed and chosen as a candidate for a | |
4399 | refactoring. Invalidating properties can be such as usage of inner classes | |
4400 | or the need for multiple return values. | |
4401 | ||
4402 | \item[no.uio.ifi.refaktor.analyze.collectors] \hfill \\ | |
4403 | This package contains the property collectors. Collectors are used to gather | |
4404 | properties from a text selection. This is mostly properties regarding | |
4405 | referenced names and their occurrences. It is these properties that makes up | |
4406 | the basis for finding the best candidates for a refactoring. | |
4407 | \end{description} | |
4408 | ||
4409 | \subsubsection{no.uio.ifi.refaktor.change} | |
e36eade0 | 4410 | This package, and its sub-packages, contains functionality for manipulate source |
a7514fbd EK |
4411 | code. |
4412 | ||
4413 | \begin{description} | |
4414 | \item[no.uio.ifi.refaktor.change.changers] \hfill \\ | |
4415 | This package contains source code changers. They are used to glue together | |
4416 | the analysis of source code and the actual execution of the changes. | |
4417 | ||
4418 | \item[no.uio.ifi.refaktor.change.executors] \hfill \\ | |
4419 | The executors that are responsible for making concrete changes are found in | |
4420 | this package. They are mostly used to create and execute one or more Eclipse | |
4421 | refactorings. | |
4422 | ||
4423 | \item[no.uio.ifi.refaktor.change.processors] \hfill \\ | |
4424 | Contains a refactoring processor for the \MoveMethod refactoring. The code | |
4425 | is stolen and modified to fix a bug. The related bug is described in | |
4426 | \myref{eclipse_bug_429416}. | |
4427 | ||
4428 | \end{description} | |
4429 | ||
4430 | \subsubsection{no.uio.ifi.refaktor.handlers} | |
4431 | This package contains handlers for the commands defined in the plugin manifest. | |
4432 | ||
4433 | \subsubsection{no.uio.ifi.refaktor.prefix} | |
4434 | This package contains the \type{Prefix} type that is the data representation of | |
4435 | the prefixes found by the \type{PrefixesCollector}. It also contains the prefix | |
4436 | set for storing and working with prefixes. | |
4437 | ||
4438 | \subsubsection{no.uio.ifi.refaktor.statistics} | |
4439 | The package contains statistics functionality. Its heart is the statistics | |
4440 | aspect that is responsible for gathering statistics during the execution of the | |
4441 | \ExtractAndMoveMethod refactoring. | |
4442 | ||
4443 | \begin{description} | |
4444 | \item[no.uio.ifi.refaktor.statistics.reports] \hfill \\ | |
4445 | This package contains a simple framework for generating reports from the | |
4446 | statistics data generated by the aspect. Currently, the only available | |
4447 | report type is a simple text report. | |
4448 | ||
4449 | \end{description} | |
4450 | ||
4451 | ||
4452 | \subsubsection{no.uio.ifi.refaktor.textselection} | |
4453 | This package contains the two custom text selections that are used extensively | |
4454 | throughout the project. One of them is just a subclass of the other, to support | |
4455 | the use of the memento pattern to optimize the memory usage during benchmarking. | |
4456 | ||
4457 | \subsubsection{no.uio.ifi.refaktor.debugging} | |
4458 | The package contains a debug utility class. I addition to this, the package | |
4459 | \code{no.uio.ifi.refaktor.utils.aspects} contains a couple of aspects used for | |
4460 | debugging purposes. | |
4461 | ||
4462 | \subsubsection{no.uio.ifi.refaktor.utils} | |
4463 | Utility package that contains all the functionality that has to do with parsing | |
4464 | of source code. It also has utility classes for looking up handles to methods | |
4465 | and types et cetera. | |
4466 | ||
4467 | \begin{description} | |
4468 | \item[no.uio.ifi.refaktor.utils.caching] \hfill \\ | |
4469 | This package contains the caching manager for compilation units, along with | |
4470 | classes for different caching strategies. | |
4471 | ||
4472 | \item[no.uio.ifi.refaktor.utils.nullobjects] \hfill \\ | |
4473 | Contains classes for creating different null objects. Most of the classes is | |
4474 | used to represent null objects of different handle types. These null objects | |
4475 | are returned from various utility classes instead of returning a \var{null} | |
4476 | value when other values are not available. | |
4477 | ||
4478 | \end{description} | |
4479 | ||
4480 | \section{Continuous integration} | |
4481 | The continuous integration server | |
4482 | \name{Jenkins}\footnote{\url{http://jenkins-ci.org/}} has been set up for the | |
4483 | project\footnote{A work mostly done by the supervisor.}. It is used as a way to | |
4484 | run tests and perform code coverage analysis. | |
4485 | ||
4486 | To be able to build the \name{Eclipse} plugins and run tests for them with Jenkins, the | |
4487 | component assembly project | |
4488 | \name{Buckminster}\footnote{\url{http://www.eclipse.org/buckminster/}} is used, | |
4489 | through its plugin for Jenkins. Buckminster provides for a way to specify the | |
4490 | resources needed for building a project and where and how to find them. | |
4491 | Buckminster also handles the setup of a target environment to run the tests in. | |
4492 | All this is needed because the code to build depends on an \name{Eclipse} | |
4493 | installation with various plugins. | |
4494 | ||
4495 | \subsection{Problems with AspectJ} | |
4496 | The Buckminster build worked fine until introducing AspectJ into the project. | |
4497 | When building projects using AspectJ, there are some additional steps that needs | |
4498 | to be performed. First of all, the aspects themselves must be compiled. Then the | |
4499 | aspects needs to be woven with the classes they affect. This demands a process | |
4500 | that does multiple passes over the source code. | |
4501 | ||
4502 | When using AspectJ with \name{Eclipse}, the specialized compilation and the | |
4503 | weaving can be handled by the \name{AspectJ Development | |
4504 | Tools}\footnote{\url{https://www.eclipse.org/ajdt/}}. This works all fine, but | |
4505 | it complicates things when trying to build a project depending on \name{Eclipse} | |
4506 | plugins outside of \name{Eclipse}. There is supposed to be a way to specify a | |
4507 | compiler adapter for javac, together with the file extensions for the file types | |
4508 | it shall operate. The AspectJ compiler adapter is called | |
4509 | \typewithref{org.aspectj.tools.ant.taskdefs}{Ajc11CompilerAdapter}, and it works | |
4510 | with files that has the extensions \code{*.java} and \code{*.aj}. I tried to | |
4511 | setup this in the build properties file for the project containing the aspects, | |
4512 | but to no avail. The project containing the aspects does not seem to be built at | |
4513 | all, and the projects that depends on it complains that they cannot find certain | |
4514 | classes. | |
4515 | ||
4516 | I then managed to write an \name{Ant}\footnote{\url{https://ant.apache.org/}} | |
4517 | build file that utilizes the AspectJ compiler adapter, for the | |
4518 | \code{no.uio.ifi.refaktor} plugin. The problem was then that it could no longer | |
4519 | take advantage of the environment set up by Buckminster. The solution to this | |
4520 | particular problem was of a ``hacky'' nature. It involves exporting the plugin | |
4521 | dependencies for the project to an Ant build file, and copy the exported path | |
4522 | into the existing build script. But then the Ant script needs to know where the | |
4523 | local \name{Eclipse} installation is located. This is no problem when building | |
4524 | on a local machine, but to utilize the setup done by Buckminster is a problem | |
4525 | still unsolved. To get the classpath for the build setup correctly, and here | |
4526 | comes the most ``hacky'' part of the solution, the Ant script has a target for | |
4527 | copying the classpath elements into a directory relative to the project | |
4528 | directory and checking it into Git. When no \code{ECLIPSE\_HOME} property is set | |
4529 | while running Ant, the script uses the copied plugins instead of the ones | |
4530 | provided by the \name{Eclipse} installation when building the project. This | |
4531 | obviously creates some problems with maintaining the list of dependencies in the | |
4532 | Ant file, as well as remembering to copy the plugins every time the list of | |
4533 | dependencies change. | |
4534 | ||
4535 | The Ant script described above is run by Jenkins before the Buckminster setup | |
4536 | and build. When setup like this, the Buckminster build succeeds for the projects | |
4537 | not using AspectJ, and the tests are run as normal. This is all good, but it | |
4538 | feels a little scary, since the reason for Buckminster not working with AspectJ | |
4539 | is still unknown. | |
4540 | ||
4541 | The problems with building with AspectJ on the Jenkins server lasted for a | |
4542 | while, before they were solved. This is reflected in the ``Test Result Trend'' | |
4543 | and ``Code Coverage Trend'' reported by Jenkins. | |
4544 | ||
4545 | ||
e1d6ae87 EK |
4546 | \chapter{Methodology} |
4547 | ||
4548 | \section{Evolutionary design} | |
4549 | In the programming work for this project, it have tried to use a design strategy | |
4928aa0b EK |
4550 | called evolutionary design, also known as continuous or incremental |
4551 | design\citing{wiki_continuous_2014}. It is a software design strategy | |
4552 | advocated by the Extreme Programming community. The essence of the strategy is | |
4553 | that you should let the design of your program evolve naturally as your | |
4554 | requirements change. This is seen in contrast with up-front design, where | |
4555 | design decisions are made early in the process. | |
e1d6ae87 EK |
4556 | |
4557 | The motivation behind evolutionary design is to keep the design of software as | |
4558 | simple as possible. This means not introducing unneeded functionality into a | |
4559 | program. You should defer introducing flexibility into your software, until it | |
4560 | is needed to be able to add functionality in a clean way. | |
4561 | ||
4562 | Holding up design decisions, implies that the time will eventually come when | |
4563 | decisions have to be made. The flexibility of the design then relies on the | |
4564 | programmer's abilities to perform the necessary refactoring, and \his confidence | |
4565 | in those abilities. From my experience working on this project, I can say that | |
4566 | this confidence is greatly enhanced by having automated tests to rely on | |
4567 | \see{tdd}. | |
4568 | ||
4569 | The choice of going for evolutionary design developed naturally. As Fowler | |
4570 | points out in his article \tit{Is Design Dead?}, evolutionary design much | |
4571 | resembles the ``code and fix'' development strategy\citing{fowler_design_2004}. | |
4572 | A strategy that most of us have practiced in school. This was also the case when | |
4573 | I first started this work. I had to learn the inner workings of Eclipse and its | |
4574 | refactoring-related plugins. That meant a lot of fumbling around with code I did | |
4575 | not know, in a trial and error fashion. Eventually I started writing tests for | |
4576 | my code, and my design began to evolve. | |
4577 | ||
4928aa0b EK |
4578 | \section{Test-driven development}\label{tdd} |
4579 | As mentioned before, the project started out as a classic code and fix | |
4580 | developmen process. My focus was aimed at getting something to work, rather than | |
4581 | doing so according to best practice. This resulted in a project that got out of | |
4582 | its starting blocks, but it was not accompanied by any tests. Hence it was soon | |
4583 | difficult to make any code changes with the confidence that the program was | |
4584 | still correct afterwards (assuming it was so before changing it). I always knew | |
4585 | that I had to introduce some tests at one point, but this experience accelerated | |
4586 | the process of leading me onto the path of testing. | |
4587 | ||
4588 | I then wrote tests for the core functionality of the plugin, and thus gained | |
4589 | more confidence in the correctness of my code. I could now perform quite drastic | |
4590 | changes without ``wetting my pants``. After this, nearly all of the semantic | |
4591 | changes done to the business logic of the project, or the addition of new | |
4592 | functionality, was made in a test-driven manner. This means that before | |
4593 | performing any changes, I would define the desired functionality through a set | |
4594 | of tests. I would then run the tests to check that they were run and that they | |
4595 | did not pass. Then I would do any code changes necessary to make the tests | |
4596 | pass. The definition of how the program is supposed to operate is then captured | |
4597 | by the tests. However, this does not prove the correctness of the analysis | |
4598 | leading to the test definitions. | |
4599 | ||
4600 | \section{Continuous integration} | |
4601 | \todoin{???} | |
4602 | ||
4603 | ||
0cbeeadb EK |
4604 | \chapter{Conclusions and Future Work} |
4605 | \todoin{Write} | |
4606 | ||
4607 | \section{Future work} | |
4608 | ||
4609 | \chapter{Related Work} | |
4610 | ||
4611 | \section{Safer refactorings} | |
4612 | \todoin{write} | |
4613 | ||
4614 | \section{The compositional paradigm of refactoring} | |
4615 | This paradigm builds upon the observation of Vakilian et | |
4616 | al.\citing{vakilian2012}, that of the many automated refactorings existing in | |
4617 | modern IDEs, the simplest ones are dominating the usage statistics. The report | |
4618 | mainly focuses on \name{Eclipse} as the tool under investigation. | |
4619 | ||
4620 | The paradigm is described almost as the opposite of automated composition of | |
4621 | refactorings \see{compositeRefactorings}. It works by providing the programmer | |
4622 | with easily accessible primitive refactorings. These refactorings shall be | |
4623 | accessed via keyboard shortcuts or quick-assist menus\footnote{Think | |
4624 | quick-assist with Ctrl+1 in \name{Eclipse}} and be promptly executed, opposed to in the | |
4625 | currently dominating wizard-based refactoring paradigm. They are meant to | |
4626 | stimulate composing smaller refactorings into more complex changes, rather than | |
4627 | doing a large upfront configuration of a wizard-based refactoring, before | |
4628 | previewing and executing it. The compositional paradigm of refactoring is | |
4629 | supposed to give control back to the programmer, by supporting \himher with an | |
4630 | option of performing small rapid changes instead of large changes with a lesser | |
4631 | degree of control. The report authors hope this will lead to fewer unsuccessful | |
4632 | refactorings. It also could lower the bar for understanding the steps of a | |
4633 | larger composite refactoring and thus also help in figuring out what goes wrong | |
4634 | if one should choose to op in on a wizard-based refactoring. | |
4635 | ||
4636 | Vakilian and his associates have performed a survey of the effectiveness of the | |
4637 | compositional paradigm versus the wizard-based one. They claim to have found | |
4638 | evidence of that the \emph{compositional paradigm} outperforms the | |
4639 | \emph{wizard-based}. It does so by reducing automation, which seem | |
4640 | counterintuitive. Therefore they ask the question ``What is an appropriate level | |
4641 | of automation?'', and thus questions what they feel is a rush toward more | |
4642 | automation in the software engineering community. | |
4643 | ||
4644 | ||
4645 | ||
4646 | \appendix | |
4647 | ||
4648 | ||
3727b75b | 4649 | \chapter{Eclipse Bugs Found} |
540ca7e4 EK |
4650 | \newcommand{\submittedBugReport}[1]{The submitted bug report can be found on |
4651 | \url{#1}.} | |
94bb49f0 EK |
4652 | |
4653 | \section{Eclipse bug 420726: Code is broken when moving a method that is | |
0f6e45f8 EK |
4654 | assigning to the parameter that is also the move |
4655 | destination}\label{eclipse_bug_420726} | |
540ca7e4 | 4656 | This bug |
94bb49f0 | 4657 | was found when analyzing what kinds of names that was to be considered as |
3727b75b | 4658 | \emph{unfixes} \see{unfixes}. |
94bb49f0 EK |
4659 | |
4660 | \subsection{The bug} | |
4661 | The bug emerges when trying to move a method from one class to another, and when | |
4662 | the target for the move (must be a variable, local or field) is both a parameter | |
fe0a4c48 | 4663 | variable and also is assigned to within the method body. \name{Eclipse} allows this to |
94bb49f0 EK |
4664 | happen, although it is the sure path to a compilation error. This is because we |
4665 | would then have an assignment to a \var{this} expression, which is not allowed | |
540ca7e4 EK |
4666 | in Java. |
4667 | \submittedBugReport{https://bugs.eclipse.org/bugs/show\_bug.cgi?id=420726} | |
94bb49f0 EK |
4668 | |
4669 | \subsection{The solution} | |
4670 | The solution to this problem is to add all simple names that are assigned to in | |
4671 | a method body to the set of unfixes. | |
128adb4f | 4672 | |
f1b6174d EK |
4673 | \section{Eclipse bug 429416: IAE when moving method from anonymous |
4674 | class}\label{eclipse_bug_429416} | |
540ca7e4 | 4675 | I discovered |
128adb4f EK |
4676 | this bug during a batch change on the \type{org.eclipse.jdt.ui} project. |
4677 | ||
4678 | \subsection{The bug} | |
fe0a4c48 | 4679 | This bug surfaces when trying to use the \refa{Move Method} refactoring to move a |
94bb49f0 | 4680 | method from an anonymous class to another class. This happens both for my |
fe0a4c48 EK |
4681 | simulation as well as in \name{Eclipse}, through the user interface. It only occurs |
4682 | when \name{Eclipse} analyzes the program and finds it necessary to pass an instance of | |
94bb49f0 | 4683 | the originating class as a parameter to the moved method. I.e. it want to pass a |
128adb4f EK |
4684 | \var{this} expression. The execution ends in an |
4685 | \typewithref{java.lang}{IllegalArgumentException} in | |
4686 | \typewithref{org.eclipse.jdt.core.dom}{SimpleName} and its | |
4687 | \method{setIdentifier(String)} method. The simple name is attempted created in | |
4688 | the method | |
4689 | \methodwithref{org.eclipse.jdt.internal.corext.refactoring.structure.\\MoveInstanceMethodProcessor}{createInlinedMethodInvocation} | |
4690 | so the \type{MoveInstanceMethodProcessor} was early a clear suspect. | |
4691 | ||
4692 | The \method{createInlinedMethodInvocation} is the method that creates a method | |
4693 | invocation where the previous invocation to the method that was moved was. From | |
4694 | its code it can be read that when a \var{this} expression is going to be passed | |
4695 | in to the invocation, it shall be qualified with the name of the original | |
3ab3e132 | 4696 | method's declaring class, if the declaring class is either an anonymous class or |
128adb4f EK |
4697 | a member class. The problem with this, is that an anonymous class does not have |
4698 | a name, hence the term \emph{anonymous} class! Therefore, when its name, an | |
4699 | empty string, is passed into | |
4700 | \methodwithref{org.eclipse.jdt.core.dom.AST}{newSimpleName} it all ends in an | |
540ca7e4 EK |
4701 | \type{IllegalArgumentException}. |
4702 | \submittedBugReport{https://bugs.eclipse.org/bugs/show\_bug.cgi?id=429416} | |
128adb4f EK |
4703 | |
4704 | \subsection{How I solved the problem} | |
4705 | Since the \type{MoveInstanceMethodProcessor} is instantiated in the | |
4706 | \typewithref{no.uio.ifi.refaktor.change.executors}{MoveMethod\-RefactoringExecutor}, | |
4707 | and only need to be a | |
4708 | \typewithref{org.eclipse.ltk.core.refactoring.participants}{MoveProcessor}, I | |
4709 | was able to copy the code for the original move processor and modify it so that | |
4710 | it works better for me. It is now called | |
f1b6174d | 4711 | \typewithref{no.uio.ifi.refaktor.change.processors}{ModifiedMoveInstanceMethodProcessor}. |
128adb4f EK |
4712 | The only modification done (in addition to some imports and suppression of |
4713 | warnings), is in the \method{createInlinedMethodInvocation}. When the declaring | |
4714 | class of the method to move is anonymous, the \var{this} expression in the | |
4715 | parameter list is not qualified with the declaring class' (empty) name. | |
4716 | ||
3727b75b EK |
4717 | \section{Eclipse bug 429954: Extracting statement with reference to local type |
4718 | breaks code}\label{eclipse_bug_429954} | |
540ca7e4 | 4719 | The bug |
a6415293 | 4720 | was discovered when doing some changes to the way unfixes is computed. |
3727b75b EK |
4721 | |
4722 | \subsection{The bug} | |
fe0a4c48 | 4723 | The problem is that \name{Eclipse} is allowing selections that references variables of |
3727b75b EK |
4724 | local types to be extracted. When this happens the code is broken, since the |
4725 | extracted method must take a parameter of a local type that is not in the | |
4726 | methods scope. The problem is illustrated in | |
540ca7e4 EK |
4727 | \myref{lst:extractMethod_LocalClass}, but there in another setting. |
4728 | \submittedBugReport{https://bugs.eclipse.org/bugs/show\_bug.cgi?id=429954} | |
3727b75b EK |
4729 | |
4730 | \subsection{Actions taken} | |
4731 | There are no actions directly springing out of this bug, since the Extract | |
a6415293 | 4732 | Method refactoring cannot be meant to be this way. This is handled on the |
fe0a4c48 | 4733 | analysis stage of our \refa{Extract and Move Method} refactoring. So names representing |
3727b75b EK |
4734 | variables of local types is considered unfixes \see{unfixes}. |
4735 | \todoin{write more when fixing this in legal statements checker} | |
9ff90080 | 4736 | \backmatter{} |
fe0a4c48 | 4737 | \printglossaries |
9ff90080 | 4738 | \printbibliography |
055dca93 | 4739 | \listoftodos |
9ff90080 | 4740 | \end{document} |