]>
Commit | Line | Data |
---|---|---|
c8088eec | 1 | \documentclass[USenglish,11pt]{ifimaster} |
571ef294 | 2 | \usepackage{import} |
7c28933b | 3 | \usepackage[utf8]{inputenc} |
9ff90080 | 4 | \usepackage[T1]{fontenc,url} |
3510e539 | 5 | \usepackage{lmodern} % using Latin Modern to be able to use bold typewriter font |
b4e539f7 | 6 | %\usepackage{mathpazo} |
9ff90080 | 7 | \urlstyle{sf} |
128adb4f | 8 | \usepackage{listings} |
4e468834 | 9 | \usepackage{tabularx} |
d11bcf4d EK |
10 | \usepackage{tikz} |
11 | \usepackage{tikz-qtree} | |
5e5908eb | 12 | \usetikzlibrary{shapes,snakes,trees,arrows,shadows,positioning,calc} |
ae138b9d EK |
13 | \usepackage{babel,textcomp,csquotes,ifimasterforside} |
14 | ||
15 | \usepackage{varioref} | |
79d5c2a9 | 16 | \usepackage[hidelinks]{hyperref} |
8b6b22c8 | 17 | \usepackage{cleveref} |
fe0a4c48 | 18 | \usepackage[xindy,entrycounter]{glossaries} |
ae138b9d | 19 | |
fe0a4c48 | 20 | \usepackage[style=alphabetic,backend=biber]{biblatex} |
12c254af | 21 | \usepackage{amsthm} |
3471cd15 | 22 | \usepackage{mathtools} |
0ab2e134 EK |
23 | \usepackage{graphicx} |
24 | % use 'disable' before printing: | |
25 | \usepackage[]{todonotes} | |
0d7fbd88 EK |
26 | \usepackage{xspace} |
27 | \usepackage{he-she} | |
b289552b | 28 | \usepackage{verbatim} |
ddcea0b5 | 29 | \usepackage{minted} |
347ed677 | 30 | \usepackage{multicol} |
ddcea0b5 | 31 | \usemintedstyle{bw} |
8fae7b44 EK |
32 | \usepackage{perpage} %the perpage package |
33 | \MakePerPage{footnote} %the perpage package command | |
9ff90080 | 34 | |
6c51af15 | 35 | \theoremstyle{definition} |
12c254af | 36 | \newtheorem*{wordDef}{Definition} |
3471cd15 | 37 | \newtheorem*{theorem}{Theorem} |
12c254af | 38 | |
ddcea0b5 EK |
39 | \graphicspath{ {./figures/} } |
40 | ||
8b6b22c8 | 41 | \newcommand{\citing}[1]{~\cite{#1}} |
e123ab03 EK |
42 | %\newcommand{\myref}[1]{\cref{#1} on \cpageref{#1}} |
43 | \newcommand{\myref}[1]{\vref{#1}} | |
8b6b22c8 | 44 | |
fe0a4c48 | 45 | \newcommand{\glossref}[1]{\textsuperscript{(\glsrefentry{#1})}} |
20bcc7bf EK |
46 | %\newcommand{\gloss}[1]{\gls{#1}\glossref{#1}} |
47 | %\newcommand{\glosspl}[1]{\glspl{#1}\glossref{#1}} | |
48 | \newcommand{\gloss}[1]{\gls{#1}} | |
49 | \newcommand{\glosspl}[1]{\glspl{#1}} | |
fe0a4c48 | 50 | |
12c254af | 51 | \newcommand{\definition}[1]{\begin{wordDef}#1\end{wordDef}} |
8b6b22c8 | 52 | \newcommand{\see}[1]{(see \myref{#1})} |
137e0e7b EK |
53 | \newcommand{\explanation}[3]{\noindent\textbf{\textit{#1}}\\*\emph{When:} |
54 | #2\\*\emph{How:} #3\\*[-7px]} | |
4e135659 | 55 | |
128adb4f | 56 | %\newcommand{\type}[1]{\lstinline{#1}} |
03674629 EK |
57 | \newcommand{\code}[1]{\texttt{\textbf{#1}}} |
58 | \newcommand{\type}[1]{\code{#1}} | |
f041551b EK |
59 | \newcommand{\typeref}[1]{\footnote{\type{#1}}} |
60 | \newcommand{\typewithref}[2]{\type{#2}\typeref{#1.#2}} | |
61 | \newcommand{\method}[1]{\type{#1}} | |
62 | \newcommand{\methodref}[2]{\footnote{\type{#1}\method{\##2()}}} | |
63 | \newcommand{\methodwithref}[2]{\method{#2}\footnote{\type{#1}\method{\##2()}}} | |
3510e539 | 64 | \newcommand{\var}[1]{\type{#1}} |
9ff90080 | 65 | |
fe0a4c48 EK |
66 | \newcommand{\name}[1]{#1} |
67 | \newcommand{\tit}[1]{\emph{#1}} | |
68 | \newcommand{\refa}[1]{\emph{#1}} | |
69 | \newcommand{\pattern}[1]{\emph{#1}} | |
70 | \newcommand{\metr}[1]{\emph{#1}} | |
71 | \newcommand{\ExtractMethod}{\refa{Extract Method}\xspace} | |
72 | \newcommand{\MoveMethod}{\refa{Move Method}\xspace} | |
73 | \newcommand{\ExtractAndMoveMethod}{\refa{Extract and Move Method}\xspace} | |
b5c7bb1b | 74 | |
7125ceac | 75 | \newcommand\todoin[2][]{\todo[inline, caption={#2}, #1]{ |
4e135659 EK |
76 | \begin{minipage}{\textwidth-4pt}#2\end{minipage}}} |
77 | ||
cbc3b044 EK |
78 | \title{Automated Composition of Refactorings} |
79 | \subtitle{Composing the Extract and Move Method refactorings in Eclipse} | |
7c28933b EK |
80 | \author{Erlend Kristiansen} |
81 | ||
fe0a4c48 EK |
82 | \makeglossaries |
83 | \newglossaryentry{profiling} | |
84 | { | |
85 | name=profiling, | |
86 | description={is to run a computer program through a profiler/with a profiler | |
87 | attached} | |
88 | } | |
20bcc7bf EK |
89 | \newglossaryentry{profiler} |
90 | { | |
91 | name=profiler, | |
92 | description={A profiler is a program for analyzing performance within an | |
93 | application. It is used to analyze memory consumption, processing time and | |
94 | frequency of procedure calls and such.} | |
95 | } | |
96 | \newglossaryentry{xUnit} | |
97 | { | |
98 | name={xUnit framework}, | |
99 | description={An xUnit framework is a framework for writing unit tests for a | |
100 | computer program. It follows the patterns known from the JUnit framework for | |
101 | Java\citing{fowlerXunit} | |
102 | }, | |
103 | plural={xUnit frameworks} | |
104 | } | |
fe0a4c48 EK |
105 | \newglossaryentry{softwareObfuscation} |
106 | { | |
107 | name={software obfuscation}, | |
108 | description={makes source code harder to read and analyze, while preserving | |
109 | its semantics} | |
110 | } | |
111 | \newglossaryentry{extractClass} | |
112 | { | |
113 | name=\refa{Extract Class}, | |
114 | description={The \refa{Extract Class} refactoring works by creating a class, | |
115 | for then to move members from another class to that class and access them from | |
116 | the old class via a reference to the new class} | |
117 | } | |
118 | \newglossaryentry{designPattern} | |
119 | { | |
120 | name={design pattern}, | |
121 | description={A design pattern is a named abstraction, that is meant to solve a | |
122 | general design problem. It describes the key aspects of a common problem and | |
123 | identifies its participators and how they collaborate}, | |
124 | plural={design patterns} | |
125 | } | |
126 | \newglossaryentry{extractMethod} | |
127 | { | |
128 | name=\refa{Extract Method}, | |
129 | description={The \refa{Extract Method} refactoring is used to extract a | |
130 | fragment of code from its context and into a new method. A call to the new | |
131 | method is inlined where the fragment was before. It is used to break code into | |
132 | logical units, with names that explain their purpose} | |
133 | } | |
134 | \newglossaryentry{moveMethod} | |
135 | { | |
136 | name=\refa{Move Method}, | |
137 | description={The \refa{Move Method} refactoring is used to move a method from | |
138 | one class to another. This is useful if the method is using more features of | |
139 | another class than of the class which it is currently defined. Then all calls | |
140 | to this method must be updated, or the method must be copied, with the old | |
141 | method delegating to the new method} | |
142 | } | |
f5fb40e4 | 143 | |
7c28933b | 144 | \bibliography{bibliography/master-thesis-erlenkr-bibliography} |
9ff90080 | 145 | |
c8088eec EK |
146 | % UML comment in TikZ: |
147 | % ref: https://tex.stackexchange.com/questions/103688/folded-paper-shape-tikz | |
43ca7be4 EK |
148 | \makeatletter |
149 | \pgfdeclareshape{umlcomment}{ | |
150 | \inheritsavedanchors[from=rectangle] % this is nearly a rectangle | |
151 | \inheritanchorborder[from=rectangle] | |
152 | \inheritanchor[from=rectangle]{center} | |
153 | \inheritanchor[from=rectangle]{north} | |
154 | \inheritanchor[from=rectangle]{south} | |
155 | \inheritanchor[from=rectangle]{west} | |
156 | \inheritanchor[from=rectangle]{east} | |
157 | % ... and possibly more | |
158 | \backgroundpath{% this is new | |
159 | % store lower right in xa/ya and upper right in xb/yb | |
160 | \southwest \pgf@xa=\pgf@x \pgf@ya=\pgf@y | |
161 | \northeast \pgf@xb=\pgf@x \pgf@yb=\pgf@y | |
162 | % compute corner of ‘‘flipped page’’ | |
163 | \pgf@xc=\pgf@xb \advance\pgf@xc by-10pt % this should be a parameter | |
164 | \pgf@yc=\pgf@yb \advance\pgf@yc by-10pt | |
165 | % construct main path | |
166 | \pgfpathmoveto{\pgfpoint{\pgf@xa}{\pgf@ya}} | |
167 | \pgfpathlineto{\pgfpoint{\pgf@xa}{\pgf@yb}} | |
168 | \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yb}} | |
169 | \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@yc}} | |
170 | \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@ya}} | |
171 | \pgfpathclose | |
172 | % add little corner | |
173 | \pgfpathmoveto{\pgfpoint{\pgf@xc}{\pgf@yb}} | |
174 | \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yc}} | |
175 | \pgfpathlineto{\pgfpoint{\pgf@xb}{\pgf@yc}} | |
176 | \pgfpathlineto{\pgfpoint{\pgf@xc}{\pgf@yc}} | |
177 | } | |
178 | } | |
179 | \makeatother | |
180 | ||
181 | \tikzstyle{comment}=[% | |
182 | draw, | |
183 | drop shadow, | |
184 | fill=white, | |
185 | align=center, | |
186 | shape=document, | |
187 | minimum width=20mm, | |
188 | minimum height=10mm, | |
189 | shape=umlcomment, | |
190 | inner sep=2ex, | |
191 | font=\ttfamily, | |
192 | ] | |
193 | ||
f5fb40e4 EK |
194 | %\interfootnotelinepenalty=10000 |
195 | ||
9ff90080 | 196 | \begin{document} |
fe0a4c48 | 197 | \pagenumbering{roman} |
531c4132 | 198 | \ififorside |
9ff90080 | 199 | \frontmatter{} |
9ff90080 EK |
200 | |
201 | ||
202 | \chapter*{Abstract} | |
064227e3 EK |
203 | \todoin{\textbf{Remove all todos (including list) before delivery/printing!!! |
204 | Can be done by removing ``draft'' from documentclass.}} | |
889ba93e | 205 | \todoin{Write abstract} |
9ff90080 EK |
206 | |
207 | \tableofcontents{} | |
208 | \listoffigures{} | |
209 | \listoftables{} | |
210 | ||
211 | \chapter*{Preface} | |
212 | ||
d1adbeef EK |
213 | The discussions in this report must be seen in the context of object oriented |
214 | programming languages, and Java in particular, since that is the language in | |
215 | which most of the examples will be given. All though the techniques discussed | |
216 | may be applicable to languages from other paradigms, they will not be the | |
217 | subject of this report. | |
f3a108c3 | 218 | |
055dca93 | 219 | \mainmatter |
00aa0588 | 220 | |
740e1b6c | 221 | \chapter{What is Refactoring?} |
7c28933b | 222 | |
f3a108c3 EK |
223 | This question is best answered by first defining the concept of a |
224 | \emph{refactoring}, what it is to \emph{refactor}, and then discuss what aspects | |
a1bafe90 | 225 | of programming make people want to refactor their code. |
00aa0588 | 226 | |
740e1b6c | 227 | \section{Defining refactoring} |
a1bafe90 | 228 | Martin Fowler, in his classic book on refactoring\citing{refactoring}, defines a |
00aa0588 | 229 | refactoring like this: |
ee45c41f | 230 | |
00aa0588 | 231 | \begin{quote} |
aa1e3779 EK |
232 | \emph{Refactoring} (noun): a change made to the internal |
233 | structure\footnote{The structure observable by the programmer.} of software to | |
234 | make it easier to understand and cheaper to modify without changing its | |
235 | observable behavior.~\cite[p.~53]{refactoring} | |
00aa0588 | 236 | \end{quote} |
ee45c41f | 237 | |
a1bafe90 | 238 | \noindent This definition assigns additional meaning to the word |
ee45c41f EK |
239 | \emph{refactoring}, beyond the composition of the prefix \emph{re-}, usually |
240 | meaning something like ``again'' or ``anew'', and the word \emph{factoring}, | |
6c51af15 EK |
241 | that can mean to isolate the \emph{factors} of something. Here a \emph{factor} |
242 | would be close to the mathematical definition of something that divides a | |
243 | quantity, without leaving a remainder. Fowler is mixing the \emph{motivation} | |
a1bafe90 EK |
244 | behind refactoring into his definition. Instead it could be more refined, formed |
245 | to only consider the \emph{mechanical} and \emph{behavioral} aspects of | |
246 | refactoring. That is to factor the program again, putting it together in a | |
247 | different way than before, while preserving the behavior of the program. An | |
248 | alternative definition could then be: | |
6c51af15 EK |
249 | |
250 | \definition{A \emph{refactoring} is a transformation | |
8fae7b44 | 251 | done to a program without altering its external behavior.} |
00aa0588 | 252 | |
ee1d883a EK |
253 | From this we can conclude that a refactoring primarily changes how the |
254 | \emph{code} of a program is perceived by the \emph{programmer}, and not the | |
740e1b6c EK |
255 | \emph{behavior} experienced by any user of the program. Although the logical |
256 | meaning is preserved, such changes could potentially alter the program's | |
257 | behavior when it comes to performance gain or -penalties. So any logic depending | |
258 | on the performance of a program could make the program behave differently after | |
259 | a refactoring. | |
00aa0588 | 260 | |
fe0a4c48 EK |
261 | In the extreme case one could argue that \gloss{softwareObfuscation} is |
262 | refactoring. It is often used to protect proprietary software. It restrains | |
263 | uninvited viewers, so they have a hard time analyzing code that they are not | |
264 | supposed to know how works. This could be a problem when using a language that | |
265 | is possible to decompile, such as Java. | |
b4e539f7 EK |
266 | |
267 | Obfuscation could be done composing many, more or less randomly chosen, | |
268 | refactorings. Then the question arises whether it can be called a | |
269 | \emph{composite refactoring} or not \see{compositeRefactorings}? The answer is | |
270 | not obvious. First, there is no way to describe the mechanics of software | |
271 | obfuscation, because there are infinitely many ways to do that. Second, | |
272 | obfuscation can be thought of as \emph{one operation}: Either the code is | |
273 | obfuscated, or it is not. Third, it makes no sense to call software obfuscation | |
274 | \emph{a refactoring}, since it holds different meaning to different people. | |
275 | ||
276 | This last point is important, since one of the motivations behind defining | |
277 | different refactorings, is to establish a \emph{vocabulary} for software | |
278 | professionals to use when reasoning about and discussing programs, similar to | |
fe0a4c48 | 279 | the motivation behind \glosspl{designPattern}\citing{designPatterns}. |
b4e539f7 EK |
280 | \begin{comment} |
281 | So for describing \emph{software obfuscation}, it might be more appropriate to | |
282 | define what you do when performing it rather than precisely defining its | |
283 | mechanics in terms of other refactorings. | |
284 | \end{comment} | |
00aa0588 | 285 | |
740e1b6c | 286 | \section{The etymology of 'refactoring'} |
f3a108c3 EK |
287 | It is a little difficult to pinpoint the exact origin of the word |
288 | ``refactoring'', as it seems to have evolved as part of a colloquial | |
289 | terminology, more than a scientific term. There is no authoritative source for a | |
290 | formal definition of it. | |
291 | ||
b5c7bb1b | 292 | According to Martin Fowler\citing{etymology-refactoring}, there may also be more |
f3a108c3 | 293 | than one origin of the word. The most well-known source, when it comes to the |
b4e539f7 EK |
294 | origin of \emph{refactoring}, is the |
295 | Smalltalk\footnote{\label{footNote}Programming language} community and their | |
fe0a4c48 | 296 | infamous \name{Refactoring |
f3a108c3 | 297 | Browser}\footnote{\url{http://st-www.cs.illinois.edu/users/brant/Refactory/RefactoringBrowser.html}} |
fe0a4c48 | 298 | described in the article \tit{A Refactoring Tool for |
b5c7bb1b EK |
299 | Smalltalk}\citing{refactoringBrowser1997}, published in 1997. |
300 | Allegedly\citing{etymology-refactoring}, the metaphor of factoring programs was | |
b4e539f7 | 301 | also present in the Forth\textsuperscript{\ref{footNote}} community, and the |
fe0a4c48 EK |
302 | word ``refactoring'' is mentioned in a book by Leo Brodie, called \tit{Thinking |
303 | Forth}\citing{brodie2004}, first published in 1984\footnote{\tit{Thinking Forth} | |
304 | was first published in 1984 by the \name{Forth Interest Group}. Then it was | |
305 | reprinted in 1994 with minor typographical corrections, before it was | |
b4e539f7 EK |
306 | transcribed into an electronic edition typeset in \LaTeX\ and published under a |
307 | Creative Commons licence in | |
308 | 2004. The edition cited here is the 2004 edition, but the content should | |
309 | essentially be as in 1984.}. The exact word is only printed one | |
310 | place~\cite[p.~232]{brodie2004}, but the term \emph{factoring} is prominent in | |
311 | the book, that also contains a whole chapter dedicated to (re)factoring, and how | |
312 | to keep the (Forth) code clean and maintainable. | |
ee45c41f | 313 | |
f3a108c3 EK |
314 | \begin{quote} |
315 | \ldots good factoring technique is perhaps the most important skill for a | |
3a154bb7 | 316 | Forth programmer.~\cite[p.~172]{brodie2004} |
f3a108c3 | 317 | \end{quote} |
ee45c41f EK |
318 | |
319 | \noindent Brodie also express what \emph{factoring} means to him: | |
320 | ||
f3a108c3 EK |
321 | \begin{quote} |
322 | Factoring means organizing code into useful fragments. To make a fragment | |
323 | useful, you often must separate reusable parts from non-reusable parts. The | |
324 | reusable parts become new definitions. The non-reusable parts become arguments | |
3a154bb7 | 325 | or parameters to the definitions.~\cite[p.~172]{brodie2004} |
f3a108c3 EK |
326 | \end{quote} |
327 | ||
328 | Fowler claims that the usage of the word \emph{refactoring} did not pass between | |
fe0a4c48 | 329 | the \name{Forth} and \name{Smalltalk} communities, but that it emerged |
f3a108c3 EK |
330 | independently in each of the communities. |
331 | ||
740e1b6c | 332 | \section{Motivation -- Why people refactor} |
2f6e6dec EK |
333 | There are many reasons why people want to refactor their programs. They can for |
334 | instance do it to remove duplication, break up long methods or to introduce | |
b4e539f7 EK |
335 | design patterns into their software systems. The shared trait for all these are |
336 | that peoples' intentions are to make their programs \emph{better}, in some | |
337 | sense. But what aspects of their programs are becoming improved? | |
338 | ||
339 | As just mentioned, people often refactor to get rid of duplication. They are | |
340 | moving identical or similar code into methods, and are pushing methods up or | |
341 | down in their class hierarchies. They are making template methods for | |
342 | overlapping algorithms/functionality, and so on. It is all about gathering what | |
343 | belongs together and putting it all in one place. The resulting code is then | |
344 | easier to maintain. When removing the implicit coupling\footnote{When | |
345 | duplicating code, the duplicate pieces of code might not be coupled, apart | |
346 | from representing the same functionality. So if this functionality is going to | |
347 | change, it might need to change in more than one place, thus creating an | |
348 | implicit coupling between multiple pieces of code.} between code snippets, the | |
137e0e7b | 349 | location of a bug is limited to only one place, and new functionality need only |
a1bafe90 EK |
350 | to be added to this one place, instead of a number of places people might not |
351 | even remember. | |
352 | ||
353 | A problem you often encounter when programming, is that a program contains a lot | |
354 | of long and hard-to-grasp methods. It can then help to break the methods into | |
fe0a4c48 EK |
355 | smaller ones, using the \gloss{extractMethod} refactoring\citing{refactoring}. |
356 | Then you may discover something about a program that you were not aware of | |
357 | before; revealing bugs you did not know about or could not find due to the | |
358 | complex structure of your program. \todo{Proof?} Making the methods smaller and | |
359 | giving good names to the new ones clarifies the algorithms and enhances the | |
a1bafe90 EK |
360 | \emph{understandability} of the program \see{magic_number_seven}. This makes |
361 | refactoring an excellent method for exploring unknown program code, or code that | |
362 | you had forgotten that you wrote. | |
363 | ||
b4e539f7 EK |
364 | Most primitive refactorings are simple, and usually involves moving code |
365 | around\citing{kerievsky2005}. The motivation behind them may first be revealed | |
366 | when they are combined into larger --- higher level --- refactorings, called | |
a1bafe90 | 367 | \emph{composite refactorings} \see{compositeRefactorings}. Often the goal of |
b4e539f7 EK |
368 | such a series of refactorings is a design pattern. Thus the design can |
369 | \emph{evolve} throughout the lifetime of a program, as opposed to designing | |
370 | up-front. It is all about being structured and taking small steps to improve a | |
371 | program's design. | |
a1bafe90 EK |
372 | |
373 | Many software design pattern are aimed at lowering the coupling between | |
374 | different classes and different layers of logic. One of the most famous is | |
fe0a4c48 EK |
375 | perhaps the \pattern{Model-View-Controller}\citing{designPatterns} pattern. It |
376 | is aimed at lowering the coupling between the user interface, the business logic | |
b4e539f7 EK |
377 | and the data representation of a program. This also has the added benefit that |
378 | the business logic could much easier be the target of automated tests, thus | |
379 | increasing the productivity in the software development process. | |
0b0567f2 EK |
380 | |
381 | Another effect of refactoring is that with the increased separation of concerns | |
a1bafe90 EK |
382 | coming out of many refactorings, the \emph{performance} can be improved. When |
383 | profiling programs, the problematic parts are narrowed down to smaller parts of | |
384 | the code, which are easier to tune, and optimization can be performed only where | |
b4e539f7 | 385 | needed and in a more effective way\citing{refactoring}. |
137e0e7b | 386 | |
b01d328a EK |
387 | Last, but not least, and this should probably be the best reason to refactor, is |
388 | to refactor to \emph{facilitate a program change}. If one has managed to keep | |
389 | one's code clean and tidy, and the code is not bloated with design patterns that | |
a1bafe90 | 390 | are not ever going to be needed, then some refactoring might be needed to |
b01d328a EK |
391 | introduce a design pattern that is appropriate for the change that is going to |
392 | happen. | |
393 | ||
137e0e7b EK |
394 | Refactoring program code --- with a goal in mind --- can give the code itself |
395 | more value. That is in the form of robustness to bugs, understandability and | |
a1bafe90 EK |
396 | maintainability. Having robust code is an obvious advantage, but |
397 | understandability and maintainability are both very important aspects of | |
398 | software development. By incorporating refactoring in the development process, | |
399 | bugs are found faster, new functionality is added more easily and code is easier | |
400 | to understand by the next person exposed to it, which might as well be the | |
401 | person who wrote it. The consequence of this, is that refactoring can increase | |
402 | the average productivity of the development process, and thus also add to the | |
403 | monetary value of a business in the long run. The perspective on productivity | |
404 | and money should also be able to open the eyes of the many nearsighted managers | |
405 | that seldom see beyond the next milestone. | |
137e0e7b | 406 | |
b01d328a | 407 | \section{The magical number seven}\label{magic_number_seven} |
fe0a4c48 EK |
408 | The article \tit{The magical number seven, plus or minus two: some limits on our |
409 | capacity for processing information}\citing{miller1956} by George A. Miller, | |
410 | was published in the journal \name{Psychological Review} in 1956. It presents | |
411 | evidence that support that the capacity of the number of objects a human being | |
412 | can hold in its working memory is roughly seven, plus or minus two objects. This | |
413 | number varies a bit depending on the nature and complexity of the objects, but | |
414 | is according to Miller ``\ldots never changing so much as to be | |
f4cea2d6 EK |
415 | unrecognizable.'' |
416 | ||
417 | Miller's article culminates in the section called \emph{Recoding}, a term he | |
418 | borrows from communication theory. The central result in this section is that by | |
419 | recoding information, the capacity of the amount of information that a human can | |
420 | process at a time is increased. By \emph{recoding}, Miller means to group | |
b4e539f7 EK |
421 | objects together in chunks, and give each chunk a new name that it can be |
422 | remembered by. | |
f4cea2d6 EK |
423 | |
424 | \begin{quote} | |
425 | \ldots recoding is an extremely powerful weapon for increasing the amount of | |
4cb06723 | 426 | information that we can deal with.~\cite[p.~95]{miller1956} |
f4cea2d6 | 427 | \end{quote} |
ee45c41f | 428 | |
b4e539f7 EK |
429 | By organizing objects into patterns of ever growing depth, one can memorize and |
430 | process a much larger amount of data than if it were to be represented as its | |
431 | basic pieces. This grouping and renaming is analogous to how many refactorings | |
432 | work, by grouping pieces of code and give them a new name. Examples are the | |
fe0a4c48 | 433 | fundamental \ExtractMethod and \refa{Extract Class} |
b4e539f7 EK |
434 | refactorings\citing{refactoring}. |
435 | ||
a1bafe90 | 436 | An example from the article addresses the problem of memorizing a sequence of |
b4e539f7 EK |
437 | binary digits. The example presented here is a slightly modified version of the |
438 | one presented in the original article\citing{miller1956}, but it preserves the | |
3ab3e132 | 439 | essence of it. Let us say we have the following sequence of |
f4cea2d6 EK |
440 | 16 binary digits: ``1010001001110011''. Most of us will have a hard time |
441 | memorizing this sequence by only reading it once or twice. Imagine if we instead | |
442 | translate it to this sequence: ``A273''. If you have a background from computer | |
b4e539f7 EK |
443 | science, it will be obvious that the latter sequence is the first sequence |
444 | recoded to be represented by digits in base 16. Most people should be able to | |
f4cea2d6 EK |
445 | memorize this last sequence by only looking at it once. |
446 | ||
447 | Another result from the Miller article is that when the amount of information a | |
448 | human must interpret increases, it is crucial that the translation from one code | |
449 | to another must be almost automatic for the subject to be able to remember the | |
0d7fbd88 | 450 | translation, before \heshe is presented with new information to recode. Thus |
f4cea2d6 EK |
451 | learning and understanding how to best organize certain kinds of data is |
452 | essential to efficiently handle that kind of data in the future. This is much | |
a1bafe90 EK |
453 | like when humans learn to read. First they must learn how to recognize letters. |
454 | Then they can learn distinct words, and later read sequences of words that form | |
455 | whole sentences. Eventually, most of them will be able to read whole books and | |
456 | briefly retell the important parts of its content. This suggest that the use of | |
b4e539f7 EK |
457 | design patterns is a good idea when reasoning about computer programs. With |
458 | extensive use of design patterns when creating complex program structures, one | |
459 | does not always have to read whole classes of code to comprehend how they | |
460 | function, it may be sufficient to only see the name of a class to almost fully | |
461 | understand its responsibilities. | |
f4cea2d6 EK |
462 | |
463 | \begin{quote} | |
464 | Our language is tremendously useful for repackaging material into a few chunks | |
4cb06723 | 465 | rich in information.~\cite[p.~95]{miller1956} |
f4cea2d6 | 466 | \end{quote} |
ee45c41f | 467 | |
a1bafe90 | 468 | Without further evidence, these results at least indicate that refactoring |
f4cea2d6 EK |
469 | source code into smaller units with higher cohesion and, when needed, |
470 | introducing appropriate design patterns, should aid in the cause of creating | |
b4e539f7 | 471 | computer programs that are easier to maintain and have code that is easier (and |
f4cea2d6 EK |
472 | better) understood. |
473 | ||
740e1b6c | 474 | \section{Notable contributions to the refactoring literature} |
4e135659 | 475 | \todoin{Update with more contributions} |
36d99783 | 476 | |
d21ef41f EK |
477 | \begin{description} |
478 | \item[1992] William F. Opdyke submits his doctoral dissertation called | |
fe0a4c48 EK |
479 | \tit{Refactoring Object-Oriented Frameworks}\citing{opdyke1992}. This work |
480 | defines a set of refactorings, that are behavior preserving given that their | |
481 | preconditions are met. The dissertation is focused on the automation of | |
482 | refactorings. | |
483 | \item[1999] Martin Fowler et al.: \tit{Refactoring: Improving the Design of | |
b5c7bb1b EK |
484 | Existing Code}\citing{refactoring}. This is maybe the most influential text |
485 | on refactoring. It bares similarities with Opdykes thesis\citing{opdyke1992} | |
d21ef41f EK |
486 | in the way that it provides a catalog of refactorings. But Fowler's book is |
487 | more about the craft of refactoring, as he focuses on establishing a | |
488 | vocabulary for refactoring, together with the mechanics of different | |
489 | refactorings and when to perform them. His methodology is also founded on | |
fe0a4c48 EK |
490 | the principles of test-driven development. |
491 | \item[2005] Joshua Kerievsky: \tit{Refactoring to | |
36d99783 | 492 | Patterns}\citing{kerievsky2005}. This book is heavily influenced by Fowler's |
fe0a4c48 | 493 | \tit{Refactoring}\citing{refactoring} and the ``Gang of Four'' \tit{Design |
36d99783 EK |
494 | Patterns}\citing{designPatterns}. It is building on the refactoring |
495 | catalogue from Fowler's book, but is trying to bridge the gap between | |
496 | \emph{refactoring} and \emph{design patterns} by providing a series of | |
497 | higher-level composite refactorings, that makes code evolve toward or away | |
fe0a4c48 | 498 | from certain design patterns. The book is trying to build up the reader's |
36d99783 EK |
499 | intuition around \emph{why} one would want to use a particular design |
500 | pattern, and not just \emph{how}. The book is encouraging evolutionary | |
e123ab03 | 501 | design \see{relationToDesignPatterns}. |
d21ef41f | 502 | \end{description} |
3b7c1d90 | 503 | |
110dae91 | 504 | \section{Tool support (for Java)}\label{toolSupport} |
3ab3e132 | 505 | This section will briefly compare the refactoring support of the three IDEs |
fe0a4c48 EK |
506 | \name{Eclipse}\footnote{\url{http://www.eclipse.org/}}, \name{IntelliJ |
507 | IDEA}\footnote{The IDE under comparison is the \name{Community Edition}, | |
4e135659 | 508 | \url{http://www.jetbrains.com/idea/}} and |
fe0a4c48 | 509 | \name{NetBeans}\footnote{\url{https://netbeans.org/}}. These are the most |
4e135659 EK |
510 | popular Java IDEs\citing{javaReport2011}. |
511 | ||
512 | All three IDEs provide support for the most useful refactorings, like the | |
513 | different extract, move and rename refactorings. In fact, Java-targeted IDEs are | |
514 | known for their good refactoring support, so this did not appear as a big | |
515 | surprise. | |
516 | ||
517 | The IDEs seem to have excellent support for the \ExtractMethod refactoring, so | |
b4e539f7 EK |
518 | at least they have all passed the first ``refactoring |
519 | rubicon''\citing{fowlerRubicon2001,secondRubicon2012}. | |
4e135659 | 520 | |
fe0a4c48 EK |
521 | Regarding the \gloss{moveMethod} refactoring, the \name{Eclipse} and |
522 | \name{IntelliJ} IDEs do the job in very similar manners. In most situations they | |
523 | both do a satisfying job by producing the expected outcome. But they do nothing | |
524 | to check that the result does not break the semantics of the program | |
525 | \see{correctness}. | |
526 | The \name{NetBeans} IDE implements this refactoring in a somewhat | |
b4e539f7 EK |
527 | unsophisticated way. For starters, the refactoring's default destination for the |
528 | move, is the same class as the method already resides in, although it refuses to | |
529 | perform the refactoring if chosen. But the worst part is, that if moving the | |
530 | method \method{f} of the class \type{C} to the class \type{X}, it will break the | |
531 | code. The result is shown in \myref{lst:moveMethod_NetBeans}. | |
4e135659 | 532 | |
347ed677 EK |
533 | \begin{listing} |
534 | \begin{multicols}{2} | |
4e135659 EK |
535 | \begin{minted}[samepage]{java} |
536 | public class C { | |
537 | private X x; | |
538 | ... | |
539 | public void f() { | |
540 | x.m(); | |
541 | x.n(); | |
542 | } | |
543 | } | |
544 | \end{minted} | |
545 | ||
347ed677 | 546 | \columnbreak |
4e135659 EK |
547 | |
548 | \begin{minted}[samepage]{java} | |
549 | public class X { | |
550 | ... | |
551 | public void f(C c) { | |
552 | c.x.m(); | |
553 | c.x.n(); | |
554 | } | |
555 | } | |
556 | \end{minted} | |
347ed677 EK |
557 | \end{multicols} |
558 | \caption{Moving method \method{f} from \type{C} to \type{X}.} | |
559 | \label{lst:moveMethod_NetBeans} | |
560 | \end{listing} | |
4e135659 | 561 | |
fe0a4c48 | 562 | \name{NetBeans} will try to create code that call the methods \method{m} and \method{n} |
4e135659 | 563 | of \type{X} by accessing them through \var{c.x}, where \var{c} is a parameter of |
a1bafe90 EK |
564 | type \type{C} that is added the method \method{f} when it is moved. (This is |
565 | seldom the desired outcome of this refactoring, but ironically, this ``feature'' | |
fe0a4c48 | 566 | keeps \name{NetBeans} from breaking the code in the example from \myref{correctness}.) |
8b6b22c8 | 567 | If \var{c.x} for some reason is inaccessible to \type{X}, as in this case, the |
fe0a4c48 | 568 | refactoring breaks the code, and it will not compile. \name{NetBeans} presents a |
8b6b22c8 EK |
569 | preview of the refactoring outcome, but the preview does not catch it if the IDE |
570 | is about break the program. | |
4778044b | 571 | |
b4e539f7 | 572 | The IDEs under investigation seem to have fairly good support for primitive |
fe0a4c48 EK |
573 | refactorings, but what about more complex ones, such as |
574 | \gloss{extractClass}\citing{refactoring}? \name{IntelliJ} handles this in a | |
575 | fairly good manner, although, in the case of private methods, it leaves unused | |
a1bafe90 | 576 | methods behind. These are methods that delegate to a field with the type of the |
fe0a4c48 EK |
577 | new class, but are not used anywhere. \name{Eclipse} has added its own quirk to |
578 | the \refa{Extract Class} refactoring, and only allows for \emph{fields} to be | |
579 | moved to a new class, \emph{not methods}. This makes it effectively only | |
580 | extracting a data structure, and calling it \refa{Extract Class} is a little | |
b4e539f7 | 581 | misleading. One would often be better off with textual extract and paste than |
fe0a4c48 EK |
582 | using the \refa{Extract Class} refactoring in \name{Eclipse}. When it comes to |
583 | \name{NetBeans}, it does not even show an attempt on providing this refactoring. | |
4778044b EK |
584 | |
585 | \todoin{Visual Studio (C++/C\#), Smalltalk refactoring browser?, | |
4e135659 | 586 | second refactoring rubicon?} |
3b7c1d90 | 587 | |
36d99783 | 588 | \section{The relation to design patterns}\label{relationToDesignPatterns} |
4cb06723 | 589 | |
fe0a4c48 EK |
590 | Refactoring and design patterns have at least one thing in common, they are both |
591 | promoted by advocates of \emph{clean code}\citing{cleanCode} as fundamental | |
592 | tools on the road to more maintainable and extendable source code. | |
4cb06723 EK |
593 | |
594 | \begin{quote} | |
595 | Design patterns help you determine how to reorganize a design, and they can | |
596 | reduce the amount of refactoring you need to do | |
597 | later.~\cite[p.~353]{designPatterns} | |
598 | \end{quote} | |
599 | ||
a1bafe90 EK |
600 | Although sometimes associated with |
601 | over-engineering\citing{kerievsky2005,refactoring}, design patterns are in | |
602 | general assumed to be good for maintainability of source code. That may be | |
d1adbeef EK |
603 | because many of them are designed to support the \emph{open/closed principle} of |
604 | object-oriented programming. The principle was first formulated by Bertrand | |
605 | Meyer, the creator of the Eiffel programming language, like this: ``Modules | |
606 | should be both open and closed.''\citing{meyer1988} It has been popularized, | |
607 | with this as a common version: | |
608 | ||
609 | \begin{quote} | |
610 | Software entities (classes, modules, functions, etc.) should be open for | |
611 | extension, but closed for modification.\footnote{See | |
612 | \url{http://c2.com/cgi/wiki?OpenClosedPrinciple} or | |
613 | \url{https://en.wikipedia.org/wiki/Open/closed_principle}} | |
614 | \end{quote} | |
615 | ||
616 | Maintainability is often thought of as the ability to be able to introduce new | |
a1bafe90 | 617 | functionality without having to change too much of the old code. When |
d1adbeef EK |
618 | refactoring, the motivation is often to facilitate adding new functionality. It |
619 | is about factoring the old code in a way that makes the new functionality being | |
620 | able to benefit from the functionality already residing in a software system, | |
621 | without having to copy old code into new. Then, next time someone shall add new | |
a1bafe90 EK |
622 | functionality, it is less likely that the old code has to change. Assuming that |
623 | a design pattern is the best way to get rid of duplication and assist in | |
624 | implementing new functionality, it is reasonable to conclude that a design | |
625 | pattern often is the target of a series of refactorings. Having a repertoire of | |
626 | design patterns can also help in knowing when and how to refactor a program to | |
627 | make it reflect certain desired characteristics. | |
d1adbeef EK |
628 | |
629 | \begin{quote} | |
a1bafe90 | 630 | There is a natural relation between patterns and refactorings. Patterns are |
d1adbeef EK |
631 | where you want to be; refactorings are ways to get there from somewhere |
632 | else.~\cite[p.~107]{refactoring} | |
633 | \end{quote} | |
634 | ||
635 | This quote is wise in many contexts, but it is not always appropriate to say | |
a1bafe90 EK |
636 | ``Patterns are where you want to be\ldots''. \emph{Sometimes}, patterns are |
637 | where you want to be, but only because it will benefit your design. It is not | |
638 | true that one should always try to incorporate as many design patterns as | |
639 | possible into a program. It is not like they have intrinsic value. They only add | |
640 | value to a system when they support its design. Otherwise, the use of design | |
641 | patterns may only lead to a program that is more complex than necessary. | |
d1adbeef EK |
642 | |
643 | \begin{quote} | |
644 | The overuse of patterns tends to result from being patterns happy. We are | |
645 | \emph{patterns happy} when we become so enamored of patterns that we simply | |
646 | must use them in our code.~\cite[p.~24]{kerievsky2005} | |
647 | \end{quote} | |
648 | ||
649 | This can easily happen when relying largely on up-front design. Then it is | |
650 | natural, in the very beginning, to try to build in all the flexibility that one | |
651 | believes will be necessary throughout the lifetime of a software system. | |
652 | According to Joshua Kerievsky ``That sounds reasonable --- if you happen to be | |
653 | psychic.''~\cite[p.~1]{kerievsky2005} He is advocating what he believes is a | |
654 | better approach: To let software continually evolve. To start with a simple | |
655 | design that meets today's needs, and tackle future needs by refactoring to | |
656 | satisfy them. He believes that this is a more economic approach than investing | |
657 | time and money into a design that inevitably is going to change. By relying on | |
658 | continuously refactoring a system, its design can be made simpler without | |
659 | sacrificing flexibility. To be able to fully rely on this approach, it is of | |
e123ab03 | 660 | utter importance to have a reliable suit of tests to lean on \see{testing}. This |
d1adbeef EK |
661 | makes the design process more natural and less characterized by difficult |
662 | decisions that has to be made before proceeding in the process, and that is | |
663 | going to define a project for all of its unforeseeable future. | |
664 | ||
b289552b EK |
665 | \begin{comment} |
666 | ||
137e0e7b EK |
667 | \section{Classification of refactorings} |
668 | % only interesting refactorings | |
669 | % with 2 detailed examples? One for structured and one for intra-method? | |
670 | % Is replacing Bubblesort with Quick Sort considered a refactoring? | |
671 | ||
672 | \subsection{Structural refactorings} | |
673 | ||
f65da046 | 674 | \subsubsection{Primitive refactorings} |
137e0e7b EK |
675 | |
676 | % Composing Methods | |
677 | \explanation{Extract Method}{You have a code fragment that can be grouped | |
678 | together.}{Turn the fragment into a method whose name explains the purpose of | |
679 | the method.} | |
680 | ||
681 | \explanation{Inline Method}{A method's body is just as clear as its name.}{Put | |
682 | the method's body into the body of its callers and remove the method.} | |
683 | ||
684 | \explanation{Inline Temp}{You have a temp that is assigned to once with a simple | |
685 | expression, and the temp is getting in the way of other refactorings.}{Replace | |
686 | all references to that temp with the expression} | |
687 | ||
688 | % Moving Features Between Objects | |
689 | \explanation{Move Method}{A method is, or will be, using or used by more | |
690 | features of another class than the class on which it is defined.}{Create a new | |
691 | method with a similar body in the class it uses most. Either turn the old method | |
692 | into a simple delegation, or remove it altogether.} | |
693 | ||
694 | \explanation{Move Field}{A field is, or will be, used by another class more than | |
695 | the class on which it is defined}{Create a new field in the target class, and | |
696 | change all its users.} | |
697 | ||
698 | % Organizing Data | |
699 | \explanation{Replace Magic Number with Symbolic Constant}{You have a literal | |
700 | number with a particular meaning.}{Create a constant, name it after the meaning, | |
701 | and replace the number with it.} | |
702 | ||
703 | \explanation{Encapsulate Field}{There is a public field.}{Make it private and | |
704 | provide accessors.} | |
705 | ||
706 | \explanation{Replace Type Code with Class}{A class has a numeric type code that | |
8fae7b44 | 707 | does not affect its behavior.}{Replace the number with a new class.} |
137e0e7b EK |
708 | |
709 | \explanation{Replace Type Code with Subclasses}{You have an immutable type code | |
8fae7b44 | 710 | that affects the behavior of a class.}{Replace the type code with subclasses.} |
137e0e7b EK |
711 | |
712 | \explanation{Replace Type Code with State/Strategy}{You have a type code that | |
8fae7b44 | 713 | affects the behavior of a class, but you cannot use subclassing.}{Replace the |
137e0e7b EK |
714 | type code with a state object.} |
715 | ||
716 | % Simplifying Conditional Expressions | |
717 | \explanation{Consolidate Duplicate Conditional Fragments}{The same fragment of | |
8fae7b44 | 718 | code is in all branches of a conditional expression.}{Move it outside of the |
137e0e7b EK |
719 | expression.} |
720 | ||
721 | \explanation{Remove Control Flag}{You have a variable that is acting as a | |
722 | control flag fro a series of boolean expressions.}{Use a break or return | |
723 | instead.} | |
724 | ||
725 | \explanation{Replace Nested Conditional with Guard Clauses}{A method has | |
8fae7b44 | 726 | conditional behavior that does not make clear the normal path of |
137e0e7b EK |
727 | execution.}{Use guard clauses for all special cases.} |
728 | ||
8fae7b44 | 729 | \explanation{Introduce Null Object}{You have repeated checks for a null |
137e0e7b EK |
730 | value.}{Replace the null value with a null object.} |
731 | ||
732 | \explanation{Introduce Assertion}{A section of code assumes something about the | |
733 | state of the program.}{Make the assumption explicit with an assertion.} | |
734 | ||
735 | % Making Method Calls Simpler | |
736 | \explanation{Rename Method}{The name of a method does not reveal its | |
737 | purpose.}{Change the name of the method} | |
738 | ||
739 | \explanation{Add Parameter}{A method needs more information from its | |
740 | caller.}{Add a parameter for an object that can pass on this information.} | |
741 | ||
742 | \explanation{Remove Parameter}{A parameter is no longer used by the method | |
743 | body.}{Remove it.} | |
744 | ||
745 | %\explanation{Parameterize Method}{Several methods do similar things but with | |
746 | %different values contained in the method.}{Create one method that uses a | |
747 | %parameter for the different values.} | |
748 | ||
749 | \explanation{Preserve Whole Object}{You are getting several values from an | |
750 | object and passing these values as parameters in a method call.}{Send the whole | |
751 | object instead.} | |
752 | ||
753 | \explanation{Remove Setting Method}{A field should be set at creation time and | |
754 | never altered.}{Remove any setting method for that field.} | |
755 | ||
756 | \explanation{Hide Method}{A method is not used by any other class.}{Make the | |
757 | method private.} | |
758 | ||
8fae7b44 EK |
759 | \explanation{Replace Constructor with Factory Method}{You want to do more than |
760 | simple construction when you create an object}{Replace the constructor with a | |
137e0e7b EK |
761 | factory method.} |
762 | ||
763 | % Dealing with Generalization | |
8fae7b44 | 764 | \explanation{Pull Up Field}{Two subclasses have the same field.}{Move the field |
137e0e7b EK |
765 | to the superclass.} |
766 | ||
767 | \explanation{Pull Up Method}{You have methods with identical results on | |
768 | subclasses.}{Move them to the superclass.} | |
769 | ||
8fae7b44 | 770 | \explanation{Push Down Method}{Behavior on a superclass is relevant only for |
137e0e7b EK |
771 | some of its subclasses.}{Move it to those subclasses.} |
772 | ||
773 | \explanation{Push Down Field}{A field is used only by some subclasses.}{Move the | |
774 | field to those subclasses} | |
775 | ||
776 | \explanation{Extract Interface}{Several clients use the same subset of a class's | |
8fae7b44 | 777 | interface, or two classes have part of their interfaces in common.}{Extract the |
137e0e7b EK |
778 | subset into an interface.} |
779 | ||
780 | \explanation{Replace Inheritance with Delegation}{A subclass uses only part of a | |
781 | superclasses interface or does not want to inherit data.}{Create a field for the | |
782 | superclass, adjust methods to delegate to the superclass, and remove the | |
783 | subclassing.} | |
784 | ||
785 | \explanation{Replace Delegation with Inheritance}{You're using delegation and | |
786 | are often writing many simple delegations for the entire interface}{Make the | |
787 | delegating class a subclass of the delegate.} | |
788 | ||
789 | \subsubsection{Composite refactorings} | |
790 | ||
791 | % Composing Methods | |
792 | % \explanation{Replace Method with Method Object}{}{} | |
793 | ||
794 | % Moving Features Between Objects | |
795 | \explanation{Extract Class}{You have one class doing work that should be done by | |
796 | two}{Create a new class and move the relevant fields and methods from the old | |
797 | class into the new class.} | |
798 | ||
799 | \explanation{Inline Class}{A class isn't doing very much.}{Move all its features | |
800 | into another class and delete it.} | |
801 | ||
802 | \explanation{Hide Delegate}{A client is calling a delegate class of an | |
803 | object.}{Create Methods on the server to hide the delegate.} | |
804 | ||
805 | \explanation{Remove Middle Man}{A class is doing to much simple delegation.}{Get | |
806 | the client to call the delegate directly.} | |
807 | ||
808 | % Organizing Data | |
809 | \explanation{Replace Data Value with Object}{You have a data item that needs | |
8fae7b44 | 810 | additional data or behavior.}{Turn the data item into an object.} |
137e0e7b EK |
811 | |
812 | \explanation{Change Value to Reference}{You have a class with many equal | |
813 | instances that you want to replace with a single object.}{Turn the object into a | |
814 | reference object.} | |
815 | ||
816 | \explanation{Encapsulate Collection}{A method returns a collection}{Make it | |
8fae7b44 | 817 | return a read-only view and provide add/remove methods.} |
137e0e7b EK |
818 | |
819 | % \explanation{Replace Array with Object}{}{} | |
820 | ||
821 | \explanation{Replace Subclass with Fields}{You have subclasses that vary only in | |
822 | methods that return constant data.}{Change the methods to superclass fields and | |
823 | eliminate the subclasses.} | |
824 | ||
825 | % Simplifying Conditional Expressions | |
826 | \explanation{Decompose Conditional}{You have a complicated conditional | |
827 | (if-then-else) statement.}{Extract methods from the condition, then part, an | |
828 | else part.} | |
829 | ||
830 | \explanation{Consolidate Conditional Expression}{You have a sequence of | |
831 | conditional tests with the same result.}{Combine them into a single conditional | |
832 | expression and extract it.} | |
833 | ||
834 | \explanation{Replace Conditional with Polymorphism}{You have a conditional that | |
8fae7b44 | 835 | chooses different behavior depending on the type of an object.}{Move each leg |
137e0e7b EK |
836 | of the conditional to an overriding method in a subclass. Make the original |
837 | method abstract.} | |
838 | ||
839 | % Making Method Calls Simpler | |
840 | \explanation{Replace Parameter with Method}{An object invokes a method, then | |
841 | passes the result as a parameter for a method. The receiver can also invoke this | |
842 | method.}{Remove the parameter and let the receiver invoke the method.} | |
843 | ||
844 | \explanation{Introduce Parameter Object}{You have a group of parameters that | |
845 | naturally go together.}{Replace them with an object.} | |
846 | ||
847 | % Dealing with Generalization | |
848 | \explanation{Extract Subclass}{A class has features that are used only in some | |
849 | instances.}{Create a subclass for that subset of features.} | |
850 | ||
851 | \explanation{Extract Superclass}{You have two classes with similar | |
852 | features.}{Create a superclass and move the common features to the | |
853 | superclass.} | |
854 | ||
855 | \explanation{Collapse Hierarchy}{A superclass and subclass are not very | |
856 | different.}{Merge them together.} | |
857 | ||
858 | \explanation{Form Template Method}{You have two methods in subclasses that | |
859 | perform similar steps in the same order, yet the steps are different.}{Get the | |
860 | steps into methods with the same signature, so that the original methods become | |
861 | the same. Then you can pull them up.} | |
862 | ||
863 | ||
864 | \subsection{Functional refactorings} | |
865 | ||
866 | \explanation{Substitute Algorithm}{You want to replace an algorithm with one | |
867 | that is clearer.}{Replace the body of the method with the new algorithm.} | |
00aa0588 | 868 | |
b289552b | 869 | \end{comment} |
00aa0588 EK |
870 | |
871 | \section{The impact on software quality} | |
872 | ||
a1bafe90 | 873 | \subsection{What is software quality?} |
00aa0588 | 874 | The term \emph{software quality} has many meanings. It all depends on the |
9a55a5bc | 875 | context we put it in. If we look at it with the eyes of a software developer, it |
a1bafe90 | 876 | usually means that the software is easily maintainable and testable, or in other |
9a55a5bc EK |
877 | words, that it is \emph{well designed}. This often correlates with the |
878 | management scale, where \emph{keeping the schedule} and \emph{customer | |
137e0e7b EK |
879 | satisfaction} is at the center. From the customers point of view, in addition to |
880 | good usability, \emph{performance} and \emph{lack of bugs} is always | |
881 | appreciated, measurements that are also shared by the software developer. (In | |
882 | addition, such things as good documentation could be measured, but this is out | |
883 | of the scope of this document.) | |
9a55a5bc | 884 | |
00aa0588 | 885 | \subsection{The impact on performance} |
9a55a5bc | 886 | \begin{quote} |
a1bafe90 EK |
887 | Refactoring certainly will make software go more slowly\footnote{With todays |
888 | compiler optimization techniques and performance tuning of e.g. the Java | |
889 | virtual machine, the penalties of object creation and method calls are | |
890 | debatable.}, but it also makes the software more amenable to performance | |
891 | tuning.~\cite[p.~69]{refactoring} | |
9a55a5bc | 892 | \end{quote} |
ee45c41f EK |
893 | |
894 | \noindent There is a common belief that refactoring compromises performance, due | |
895 | to increased degree of indirection and that polymorphism is slower than | |
9a55a5bc EK |
896 | conditionals. |
897 | ||
b5c7bb1b | 898 | In a survey, Demeyer\citing{demeyer2002} disproves this view in the case of |
a1bafe90 | 899 | polymorphism. He did an experiment on, what he calls, ``Transform Self Type |
9a55a5bc EK |
900 | Checks'' where you introduce a new polymorphic method and a new class hierarchy |
901 | to get rid of a class' type checking of a ``type attribute``. He uses this kind | |
902 | of transformation to represent other ways of replacing conditionals with | |
903 | polymorphism as well. The experiment is performed on the C++ programming | |
a1bafe90 EK |
904 | language and with three different compilers and platforms. Demeyer concludes |
905 | that, with compiler optimization turned on, polymorphism beats middle to large | |
906 | sized if-statements and does as well as case-statements. (In accordance with | |
907 | his hypothesis, due to similarities between the way C++ handles polymorphism and | |
908 | case-statements.) | |
ee45c41f | 909 | |
9a55a5bc EK |
910 | \begin{quote} |
911 | The interesting thing about performance is that if you analyze most programs, | |
b5c7bb1b | 912 | you find that they waste most of their time in a small fraction of the |
4cb06723 | 913 | code.~\cite[p.~70]{refactoring} |
9a55a5bc | 914 | \end{quote} |
9a55a5bc | 915 | |
ee45c41f EK |
916 | \noindent So, although an increased amount of method calls could potentially |
917 | slow down programs, one should avoid premature optimization and sacrificing good | |
fe0a4c48 EK |
918 | design, leaving the performance tuning until after \gloss{profiling} the |
919 | software and having isolated the actual problem areas. | |
00aa0588 | 920 | |
0d7fbd88 | 921 | \section{Composite refactorings}\label{compositeRefactorings} |
f3a108c3 EK |
922 | \todo{motivation, examples, manual vs automated?, what about refactoring in a |
923 | very large code base?} | |
6065c96c | 924 | Generally, when thinking about refactoring, at the mechanical level, there are |
f65da046 | 925 | essentially two kinds of refactorings. There are the \emph{primitive} |
a1bafe90 | 926 | refactorings, and the \emph{composite} refactorings. |
6065c96c | 927 | |
6c51af15 EK |
928 | \definition{A \emph{primitive refactoring} is a refactoring that cannot be |
929 | expressed in terms of other refactorings.} | |
f65da046 | 930 | |
fe0a4c48 | 931 | \noindent Examples are the \refa{Pull Up Field} and \refa{Pull Up |
a1bafe90 | 932 | Method} refactorings\citing{refactoring}, that move members up in their class |
ee45c41f EK |
933 | hierarchies. |
934 | ||
6c51af15 EK |
935 | \definition{A \emph{composite refactoring} is a refactoring that can be |
936 | expressed in terms of two or more other refactorings.} | |
f65da046 | 937 | |
fe0a4c48 | 938 | \noindent An example of a composite refactoring is the \refa{Extract |
b5c7bb1b EK |
939 | Superclass} refactoring\citing{refactoring}. In its simplest form, it is composed |
940 | of the previously described primitive refactorings, in addition to the | |
fe0a4c48 | 941 | \refa{Pull Up Constructor Body} refactoring\citing{refactoring}. It works |
b5c7bb1b | 942 | by creating an abstract superclass that the target class(es) inherits from, then |
fe0a4c48 EK |
943 | by applying \refa{Pull Up Field}, \refa{Pull Up Method} and |
944 | \refa{Pull Up Constructor Body} on the members that are to be members of | |
f5fb40e4 EK |
945 | the new superclass. If there are multiple classes in play, their interfaces may |
946 | need to be united with the help of some rename refactorings, before extracting | |
fe0a4c48 | 947 | the superclass. For an overview of the \refa{Extract Superclass} |
8b6b22c8 | 948 | refactoring, see \myref{fig:extractSuperclass}. |
6065c96c | 949 | |
ddcea0b5 EK |
950 | \begin{figure}[h] |
951 | \centering | |
faa9f4f3 | 952 | \includegraphics[angle=270,width=\linewidth]{extractSuperclassItalic.pdf} |
f5fb40e4 | 953 | \caption{The Extract Superclass refactoring, with united interfaces.} |
ddcea0b5 EK |
954 | \label{fig:extractSuperclass} |
955 | \end{figure} | |
6065c96c EK |
956 | |
957 | \section{Manual vs. automated refactorings} | |
0d7fbd88 | 958 | Refactoring is something every programmer does, even if \heshe does not known |
f65da046 EK |
959 | the term \emph{refactoring}. Every refinement of source code that does not alter |
960 | the program's behavior is a refactoring. For small refactorings, such as | |
0d7fbd88 | 961 | \ExtractMethod, executing it manually is a manageable task, but is still prone |
a1bafe90 | 962 | to errors. Getting it right the first time is not easy, considering the method |
f65da046 EK |
963 | signature and all the other aspects of the refactoring that has to be in place. |
964 | ||
f5fb40e4 EK |
965 | Consider the renaming of classes, methods and fields. For complex programs these |
966 | refactorings are almost impossible to get right. Attacking them with textual | |
967 | search and replace, or even regular expressions, will fall short on these tasks. | |
968 | Then it is crucial to have proper tool support that can perform them | |
969 | automatically. Tools that can parse source code and thus have semantic knowledge | |
970 | about which occurrences of which names belong to what construct in the program. | |
971 | For even trying to perform one of these complex task manually, one would have to | |
972 | be very confident on the existing test suite \see{testing}. | |
00aa0588 | 973 | |
19c4f27d | 974 | \section{Correctness of refactorings}\label{correctness} |
f65da046 | 975 | For automated refactorings to be truly useful, they must show a high degree of |
f5fb40e4 EK |
976 | behavior preservation. This last sentence might seem obvious, but there are |
977 | examples of refactorings in existing tools that break programs. In an ideal | |
978 | world, every automated refactoring would be ``complete'', in the sense that it | |
979 | would never break a program. In an ideal world, every program would also be free | |
980 | from bugs. In modern IDEs the implemented automated refactorings are working for | |
981 | \emph{most} cases, that is enough for making them useful. | |
982 | ||
983 | I will now present an example of a \emph{corner case} where a program breaks | |
984 | when a refactoring is applied. The example shows an \ExtractMethod refactoring | |
985 | followed by a \MoveMethod refactoring that breaks a program in both the | |
fe0a4c48 | 986 | \name{Eclipse} and \name{IntelliJ} IDEs\footnote{The \name{NetBeans} IDE handles this |
3ab3e132 | 987 | particular situation without altering the program's behavior, mainly because |
fe0a4c48 | 988 | its \refa{Move Method} refactoring implementation is a bit flawed in other ways |
f5fb40e4 EK |
989 | \see{toolSupport}.}. The target and the destination for the composed |
990 | refactoring is shown in \myref{lst:correctnessExtractAndMove}. Note that the | |
991 | method \method{m(C c)} of class \type{X} assigns to the field \var{x} of the | |
992 | argument \var{c} that has type \type{C}. | |
993 | ||
994 | \begin{listing}[h] | |
995 | \begin{multicols}{2} | |
996 | \begin{minted}[linenos]{java} | |
997 | // Refactoring target | |
ddcea0b5 | 998 | public class C { |
f5fb40e4 | 999 | public X x = new X(); |
ee45c41f | 1000 | |
f5fb40e4 EK |
1001 | public void f() { |
1002 | x.m(this); | |
1003 | // Not the same x | |
1004 | x.n(); | |
1005 | } | |
ddcea0b5 EK |
1006 | } |
1007 | \end{minted} | |
ee45c41f | 1008 | |
f5fb40e4 | 1009 | \columnbreak |
ee45c41f | 1010 | |
f5fb40e4 EK |
1011 | \begin{minted}[]{java} |
1012 | // Method destination | |
ee45c41f | 1013 | public class X { |
f5fb40e4 EK |
1014 | public void m(C c) { |
1015 | c.x = new X(); | |
1016 | // If m is called from | |
1017 | // c, then c.x no longer | |
1018 | // equals 'this' | |
1019 | } | |
1020 | public void n() {} | |
ee45c41f EK |
1021 | } |
1022 | \end{minted} | |
f5fb40e4 EK |
1023 | \end{multicols} |
1024 | \caption{The target and the destination for the composition of the Extract | |
fe0a4c48 | 1025 | Method and \refa{Move Method} refactorings.} |
f5fb40e4 EK |
1026 | \label{lst:correctnessExtractAndMove} |
1027 | \end{listing} | |
ee45c41f | 1028 | |
f5fb40e4 EK |
1029 | |
1030 | The refactoring sequence works by extracting line 6 through 8 from the original | |
3510e539 | 1031 | class \type{C} into a method \method{f} with the statements from those lines as |
f5fb40e4 EK |
1032 | its method body (but with the comment left out, since it will no longer hold any |
1033 | meaning). The method is then moved to the class \type{X}. The result is shown | |
1034 | in \myref{lst:correctnessExtractAndMoveResult}. | |
ee45c41f | 1035 | |
f5fb40e4 EK |
1036 | Before the refactoring, the methods \method{m} and \method{n} of class \type{X} |
1037 | are called on different object instances (see line 6 and 8 of the original class | |
1038 | \type{C} in \cref{lst:correctnessExtractAndMove}). After the refactoring, they | |
1039 | are called on the same object, and the statement on line | |
1040 | 3 of class \type{X} (in \cref{lst:correctnessExtractAndMoveResult}) no longer | |
1041 | has the desired effect in our example. The method \method{f} of class \type{C} | |
1042 | is now calling the method \method{f} of class \type{X} (see line 5 of class | |
1043 | \type{C} in \cref{lst:correctnessExtractAndMoveResult}), and the program now | |
1044 | behaves different than before. | |
1045 | ||
1046 | \begin{listing}[h] | |
1047 | \begin{multicols}{2} | |
1048 | \begin{minted}[linenos]{java} | |
ee45c41f EK |
1049 | public class C { |
1050 | public X x = new X(); | |
1051 | ||
1052 | public void f() { | |
1053 | x.f(this); | |
1054 | } | |
1055 | } | |
1056 | \end{minted} | |
1057 | ||
f5fb40e4 EK |
1058 | \columnbreak |
1059 | ||
1060 | \begin{minted}[linenos]{java} | |
ee45c41f EK |
1061 | public class X { |
1062 | public void m(C c) { | |
1063 | c.x = new X(); | |
1064 | } | |
1065 | public void n() {} | |
f5fb40e4 EK |
1066 | // Extracted and |
1067 | // moved method | |
ee45c41f EK |
1068 | public void f(C c) { |
1069 | m(c); | |
1070 | n(); | |
1071 | } | |
1072 | } | |
1073 | \end{minted} | |
f5fb40e4 EK |
1074 | \end{multicols} |
1075 | \caption{The result of the composed refactoring.} | |
1076 | \label{lst:correctnessExtractAndMoveResult} | |
1077 | \end{listing} | |
ddcea0b5 | 1078 | |
aa1e3779 EK |
1079 | The bug introduced in the previous example is of such a nature\footnote{Caused |
1080 | by aliasing. See \url{https://en.wikipedia.org/wiki/Aliasing_(computing)}} | |
1081 | that it is very difficult to spot if the refactored code is not covered by | |
1082 | tests. It does not generate compilation errors, and will thus only result in | |
1083 | a runtime error or corrupted data, which might be hard to detect. | |
19c4f27d | 1084 | |
29f39f29 | 1085 | \section{Refactoring and the importance of testing}\label{testing} |
19c4f27d EK |
1086 | \begin{quote} |
1087 | If you want to refactor, the essential precondition is having solid | |
1088 | tests.\citing{refactoring} | |
1089 | \end{quote} | |
1090 | ||
29f39f29 EK |
1091 | When refactoring, there are roughly three classes of errors that can be made. |
1092 | The first class of errors are the ones that make the code unable to compile. | |
1093 | These \emph{compile-time} errors are of the nicer kind. They flash up at the | |
1094 | moment they are made (at least when using an IDE), and are usually easy to fix. | |
1095 | The second class are the \emph{runtime} errors. Although they take a bit longer | |
1096 | to surface, they usually manifest after some time in an illegal argument | |
1097 | exception, null pointer exception or similar during the program execution. | |
1098 | These kind of errors are a bit harder to handle, but at least they will show, | |
1099 | eventually. Then there are the \emph{behavior-changing} errors. These errors are | |
1100 | of the worst kind. They do not show up during compilation and they do not turn | |
1101 | on a blinking red light during runtime either. The program can seem to work | |
1102 | perfectly fine with them in play, but the business logic can be damaged in ways | |
1103 | that will only show up over time. | |
1104 | ||
1105 | For discovering runtime errors and behavior changes when refactoring, it is | |
1106 | essential to have good test coverage. Testing in this context means writing | |
1107 | automated tests. Manual testing may have its uses, but when refactoring, it is | |
1108 | automated unit testing that dominate. For discovering behavior changes it is | |
1109 | especially important to have tests that cover potential problems, since these | |
1110 | kind of errors does not reveal themselves. | |
1111 | ||
1112 | Unit testing is not a way to \emph{prove} that a program is correct, but it is a | |
3ab3e132 | 1113 | way to make you confident that it \emph{probably} works as desired. In the |
29f39f29 EK |
1114 | context of test driven development (commonly known as TDD), the tests are even a |
1115 | way to define how the program is \emph{supposed} to work. It is then, by | |
1116 | definition, working if the tests are passing. | |
19c4f27d EK |
1117 | |
1118 | If the test coverage for a code base is perfect, then it should, theoretically, | |
29f39f29 EK |
1119 | be risk-free to perform refactorings on it. This is why automated tests and |
1120 | refactoring are such a great match. | |
f65da046 | 1121 | |
b5d53f51 | 1122 | \subsection{Testing the code from correctness section} |
29f39f29 EK |
1123 | The worst thing that can happen when refactoring is to introduce changes to the |
1124 | behavior of a program, as in the example on \myref{correctness}. This example | |
1125 | may be trivial, but the essence is clear. The only problem with the example is | |
1126 | that it is not clear how to create automated tests for it, without changing it | |
1127 | in intrusive ways. | |
1128 | ||
20bcc7bf | 1129 | Unit tests, as they are known from the different \glosspl{xUnit} around, are |
29f39f29 EK |
1130 | only suitable to test the \emph{result} of isolated operations. They can not |
1131 | easily (if at all) observe the \emph{history} of a program. | |
b5d53f51 | 1132 | |
a13e5650 | 1133 | This problem is still open. |
116805bf | 1134 | |
a13e5650 EK |
1135 | \todoin{Write?} |
1136 | \begin{comment} | |
116805bf EK |
1137 | |
1138 | Assuming a sequential (non-concurrent) program: | |
1139 | ||
1140 | \begin{minted}{java} | |
1141 | tracematch (C c, X x) { | |
1142 | sym m before: | |
1143 | call(* X.m(C)) && args(c) && cflow(within(C)); | |
1144 | sym n before: | |
1145 | call(* X.n()) && target(x) && cflow(within(C)); | |
1146 | sym setCx after: | |
1147 | set(C.x) && target(c) && !cflow(m); | |
1148 | ||
1149 | m n | |
1150 | ||
1151 | { assert x == c.x; } | |
1152 | } | |
1153 | \end{minted} | |
1154 | ||
1155 | %\begin{minted}{java} | |
1156 | %tracematch (X x1, X x2) { | |
1157 | % sym m before: | |
1158 | % call(* X.m(C)) && target(x1); | |
1159 | % sym n before: | |
1160 | % call(* X.n()) && target(x2); | |
1161 | % sym setX after: | |
1162 | % set(C.x) && !cflow(m) && !cflow(n); | |
1163 | % | |
1164 | % m n | |
1165 | % | |
1166 | % { assert x1 != x2; } | |
1167 | %} | |
1168 | %\end{minted} | |
a13e5650 | 1169 | \end{comment} |
116805bf | 1170 | |
222d172b EK |
1171 | |
1172 | \chapter{The Project} | |
1173 | ||
1174 | \todoin{Moved from introduction to here. Rewrite and make problem statement from | |
1175 | it:} | |
a13e5650 | 1176 | The aim of this master project will be to investigate the relationship between a |
b5d53f51 EK |
1177 | composite refactoring composed of the \ExtractMethod and \MoveMethod |
1178 | refactorings, and its impact on one or more software metrics. | |
1179 | ||
a13e5650 EK |
1180 | The composition of the \ExtractMethod and \MoveMethod refactorings springs |
1181 | naturally out of the need to move procedures closer to the data they manipulate. | |
1182 | This composed refactoring is not well described in the literature, but it is | |
1183 | implemented in at least one tool called | |
fe0a4c48 EK |
1184 | \name{CodeRush}\footnote{\url{https://help.devexpress.com/\#CodeRush/CustomDocument3519}}, |
1185 | that is an extension for \name{MS Visual | |
b5d53f51 | 1186 | Studio}\footnote{\url{http://www.visualstudio.com/}}. In CodeRush it is called |
fe0a4c48 | 1187 | \refa{Extract Method to |
b5d53f51 EK |
1188 | Type}\footnote{\url{https://help.devexpress.com/\#CodeRush/CustomDocument6710}}, |
1189 | but I choose to call it \ExtractAndMoveMethod, since I feel it better | |
1190 | communicates which primitive refactorings it is composed of. | |
1191 | ||
fe0a4c48 | 1192 | For the metrics, I will at least measure the \metr{Coupling between object |
b5d53f51 | 1193 | classes} (CBO) metric that is described by Chidamber and Kemerer in their |
fe0a4c48 | 1194 | article \tit{A Metrics Suite for Object Oriented |
b5d53f51 EK |
1195 | Design}\citing{metricsSuite1994}. |
1196 | ||
1197 | The project will then consist in implementing the \ExtractAndMoveMethod | |
1198 | refactoring, as well as executing it over a larger code base. Then the effect of | |
1199 | the change must be measured by calculating the chosen software metrics both | |
958f7baf EK |
1200 | before and after the execution. To be able to execute the refactoring |
1201 | automatically I have to make it analyze code to determine the best selections to | |
1202 | extract into new methods. | |
3b7c1d90 | 1203 | \section{The problem statement} |
70905ddc EK |
1204 | \todoin{write/move} |
1205 | ||
04e21f15 EK |
1206 | \section{The refactorings} |
1207 | ||
1208 | \subsection{The Extract Method refactoring} | |
1209 | The \refa{Extract Method} refactoring is used to extract a fragment of code | |
1210 | from its context and into a new method. A call to the new method is inlined | |
1211 | where the fragment was before. It is used to break code into logical units, | |
1212 | with names that explain their purpose | |
1213 | \todoin{Refine} | |
1214 | \missingfigure{Explaining the Extract Method refactoring} | |
1215 | ||
1216 | \subsection{The Move Method refactoring} | |
1217 | The \refa{Move Method} refactoring is used to move a method from one class to | |
1218 | another. This is useful if the method is using more features of another class | |
1219 | than of the class which it is currently defined. | |
1220 | \todoin{Refine} | |
1221 | \missingfigure{Explaining the Move Method refactoring} | |
1222 | ||
1223 | \subsection{The Extract and Move Method refactoring} | |
1224 | \todoin{Write} | |
1225 | \missingfigure{Explaining the Extract and Move Method refactoring} | |
1226 | ||
1227 | ||
3f929fcc | 1228 | \section{Choosing the target language} |
70905ddc EK |
1229 | Choosing which programming language the code that shall be manipulated shall be |
1230 | written in, is not a very difficult task. We choose to limit the possible | |
1231 | languages to the object-oriented programming languages, since most of the | |
1232 | terminology and literature regarding refactoring comes from the world of | |
1233 | object-oriented programming. In addition, the language must have existing tool | |
1234 | support for refactoring. | |
1235 | ||
fe0a4c48 | 1236 | The \name{Java} programming language\footnote{\url{https://www.java.com/}} is |
70905ddc EK |
1237 | the dominating language when it comes to example code in the literature of |
1238 | refactoring, and is thus a natural choice. Java is perhaps, currently the most | |
fe0a4c48 | 1239 | influential programming language in the world, with its \name{Java Virtual |
70905ddc EK |
1240 | Machine} that runs on all of the most popular architectures and also supports |
1241 | dozens of other programming languages\footnote{They compile to java bytecode.}, | |
fe0a4c48 | 1242 | with \name{Scala}, \name{Clojure} and \name{Groovy} as the most prominent ones. |
70905ddc EK |
1243 | Java is currently the language that every other programming language is compared |
1244 | against. It is also the primary programming language for the author of this | |
1245 | thesis. | |
3f929fcc EK |
1246 | |
1247 | \section{Choosing the tools} | |
3ab3e132 | 1248 | When choosing a tool for manipulating Java, there are certain criteria that |
3f929fcc EK |
1249 | have to be met. First of all, the tool should have some existing refactoring |
1250 | support that this thesis can build upon. Secondly it should provide some kind of | |
1251 | framework for parsing and analyzing Java source code. Third, it should itself be | |
1252 | open source. This is both because of the need to be able to browse the code for | |
1253 | the existing refactorings that is contained in the tool, and also because open | |
1254 | source projects hold value in them selves. Another important aspect to consider | |
1255 | is that open source projects of a certain size, usually has large communities of | |
3ab3e132 EK |
1256 | people connected to them, that are committed to answering questions regarding the |
1257 | use and misuse of the products, that to a large degree is made by the community | |
3f929fcc EK |
1258 | itself. |
1259 | ||
3ab3e132 | 1260 | There is a certain class of tools that meet these criteria, namely the class of |
3f929fcc | 1261 | \emph{IDEs}\footnote{\emph{Integrated Development Environment}}. These are |
3ab3e132 | 1262 | programs that is meant to support the whole production cycle of a computer |
3f929fcc EK |
1263 | program, and the most popular IDEs that support Java, generally have quite good |
1264 | refactoring support. | |
1265 | ||
fe0a4c48 EK |
1266 | The main contenders for this thesis is the \name{Eclipse IDE}, with the |
1267 | \name{Java development tools} (JDT), the \name{IntelliJ IDEA Community Edition} | |
1268 | and the \name{NetBeans IDE} \see{toolSupport}. \name{Eclipse} and | |
1269 | \name{NetBeans} are both free, open source and community driven, while the | |
1270 | \name{IntelliJ IDEA} has an open sourced community edition that is free of | |
1271 | charge, but also offer an \name{Ultimate Edition} with an extended set of | |
1272 | features, at additional cost. All three IDEs supports adding plugins to extend | |
1273 | their functionality and tools that can be used to parse and analyze Java source | |
1274 | code. But one of the IDEs stand out as a favorite, and that is the \name{Eclipse | |
1275 | IDE}. This is the most popular\citing{javaReport2011} among them and seems to be | |
1276 | de facto standard IDE for Java development regardless of platform. | |
4e135659 | 1277 | |
a5317dcf | 1278 | \section{Source code organization} |
2bb37e5e | 1279 | All the parts of this master project is under version control with |
fe0a4c48 | 1280 | \name{Git}\footnote{\url{http://git-scm.com/}}. |
2bb37e5e | 1281 | |
fe0a4c48 EK |
1282 | The software written is organized as some \name{Eclipse} plugins. Writing a plugin is |
1283 | the natural way to utilize the API of \name{Eclipse}. This also makes it possible to | |
0bd768f3 EK |
1284 | provide a user interface to manually run operations on selections in program |
1285 | source code or whole projects/packages. | |
1286 | ||
fe0a4c48 | 1287 | When writing a plugin in \name{Eclipse}, one has access to resources such as the |
0bd768f3 EK |
1288 | current workspace, the open editor and the current selection. |
1289 | ||
a5317dcf EK |
1290 | The thesis work is contained in the following Eclipse projects: |
1291 | ||
1292 | \begin{description} | |
f1b6174d EK |
1293 | \item[no.uio.ifi.refaktor] \hfill \\ This is the main Eclipse plugin |
1294 | project, and contains all of the business logic for the plugin. | |
a5317dcf EK |
1295 | |
1296 | \item[no.uio.ifi.refaktor.tests] \hfill \\ | |
1297 | This project contains the tests for the main plugin. | |
1298 | ||
1299 | \item[no.uio.ifi.refaktor.examples] \hfill \\ | |
1300 | Contains example code used in testing. It also contains code for managing | |
1301 | this example code, such as creating an Eclipse project from it before a test | |
1302 | run. | |
1303 | ||
1304 | \item[no.uio.ifi.refaktor.benchmark] \hfill \\ | |
1305 | This project contains code for running search based versions of the | |
1306 | composite refactoring over selected Eclipse projects. | |
1307 | ||
1308 | \item[no.uio.ifi.refaktor.releng] \hfill \\ | |
1309 | Contains the rmap, queries and target definitions needed by by Buckminster | |
1310 | on the Jenkins continuous integration server. | |
1311 | ||
1312 | \end{description} | |
1313 | ||
f1b6174d EK |
1314 | \subsection{The no.uio.ifi.refaktor project} |
1315 | ||
1316 | \subsubsection{no.uio.ifi.refaktor.analyze} | |
1317 | This package, and its subpackages, contains code that is used for analyzing Java | |
1318 | source code. The most important subpackages are presented below. | |
1319 | ||
1320 | \begin{description} | |
1321 | \item[no.uio.ifi.refaktor.analyze.analyzers] \hfill \\ | |
1322 | This package contains source code analyzers. These are usually responsible | |
1323 | for analyzing text selections or running specialized analyzers for different | |
1324 | kinds of entities. Their structure are often hierarchical. This means that | |
1325 | you have an analyzer for text selections, that in turn is utilized by an | |
1326 | analyzer that analyzes all the selections of a method. Then there are | |
1327 | analyzers for analyzing all the methods of a type, all the types of a | |
1328 | compilation unit, all the compilation units of a package, and, at last, all | |
1329 | of the packages in a project. | |
1330 | ||
1331 | \item[no.uio.ifi.refaktor.analyze.checkers] \hfill \\ | |
1332 | A package containing checkers. The checkers are classes used to validate | |
1333 | that a selection can be further analyzed and chosen as a candidate for a | |
1334 | refactoring. Invalidating properties can be such as usage of inner classes | |
1335 | or the need for multiple return values. | |
1336 | ||
1337 | \item[no.uio.ifi.refaktor.analyze.collectors] \hfill \\ | |
1338 | This package contains the property collectors. Collectors are used to gather | |
1339 | properties from a text selection. This is mostly properties regarding | |
1340 | referenced names and their occurrences. It is these properties that makes up | |
1341 | the basis for finding the best candidates for a refactoring. | |
1342 | \end{description} | |
1343 | ||
1344 | \subsubsection{no.uio.ifi.refaktor.change} | |
1345 | This package, and its subpackages, contains functionality for manipulate source | |
1346 | code. | |
1347 | ||
1348 | \begin{description} | |
1349 | \item[no.uio.ifi.refaktor.change.changers] \hfill \\ | |
1350 | This package contains source code changers. They are used to glue together | |
1351 | the analysis of source code and the actual execution of the changes. | |
1352 | ||
1353 | \item[no.uio.ifi.refaktor.change.executors] \hfill \\ | |
1354 | The executors that are responsible for making concrete changes are found in | |
1355 | this package. They are mostly used to create and execute one or more Eclipse | |
1356 | refactorings. | |
1357 | ||
1358 | \item[no.uio.ifi.refaktor.change.processors] \hfill \\ | |
1359 | Contains a refactoring processor for the \MoveMethod refactoring. The code | |
1360 | is stolen and modified to fix a bug. The related bug is described in | |
1361 | \myref{eclipse_bug_429416}. | |
1362 | ||
1363 | \end{description} | |
1364 | ||
1365 | \subsubsection{no.uio.ifi.refaktor.handlers} | |
1366 | This package contains handlers for the commands defined in the plugin manifest. | |
1367 | ||
1368 | \subsubsection{no.uio.ifi.refaktor.prefix} | |
1369 | This package contains the \type{Prefix} type that is the data representation of | |
1370 | the prefixes found by the \type{PrefixesCollector}. It also contains the prefix | |
1371 | set for storing and working with prefixes. | |
1372 | ||
1373 | \subsubsection{no.uio.ifi.refaktor.statistics} | |
1374 | The package contains statistics functionality. Its heart is the statistics | |
1375 | aspect that is responsible for gathering statistics during the execution of the | |
1376 | \ExtractAndMoveMethod refactoring. | |
1377 | ||
1378 | \begin{description} | |
1379 | \item[no.uio.ifi.refaktor.statistics.reports] \hfill \\ | |
1380 | This package contains a simple framework for generating reports from the | |
1381 | statistics data generated by the aspect. Currently, the only available | |
1382 | report type is a simple text report. | |
1383 | ||
1384 | \end{description} | |
1385 | ||
1386 | ||
1387 | \subsubsection{no.uio.ifi.refaktor.textselection} | |
1388 | This package contains the two custom text selections that are used extensively | |
1389 | throughout the project. One of them is just a subclass of the other, to support | |
1390 | the use of the memento pattern to optimize the memory usage during benchmarking. | |
1391 | ||
1392 | \subsubsection{no.uio.ifi.refaktor.debugging} | |
1393 | The package contains a debug utility class. I addition to this, the package | |
1394 | \code{no.uio.ifi.refaktor.utils.aspects} contains a couple of aspects used for | |
1395 | debugging purposes. | |
1396 | ||
1397 | \subsubsection{no.uio.ifi.refaktor.utils} | |
1398 | Utility package that contains all the functionality that has to do with parsing | |
1399 | of source code. It also has utility classes for looking up handles to methods | |
1400 | and types et cetera. | |
1401 | ||
1402 | \begin{description} | |
1403 | \item[no.uio.ifi.refaktor.utils.caching] \hfill \\ | |
1404 | This package contains the caching manager for compilation units, along with | |
1405 | classes for different caching strategies. | |
1406 | ||
1407 | \item[no.uio.ifi.refaktor.utils.nullobjects] \hfill \\ | |
1408 | Contains classes for creating different null objects. Most of the classes is | |
1409 | used to represent null objects of different handle types. These null objects | |
1410 | are returned from various utility classes instead of returning a \var{null} | |
1411 | value when other values are not available. | |
1412 | ||
1413 | \end{description} | |
1414 | ||
1415 | ||
a5317dcf | 1416 | |
77c5e0bc EK |
1417 | \section{Continuous integration} |
1418 | The continuous integration server | |
fe0a4c48 | 1419 | \name{Jenkins}\footnote{\url{http://jenkins-ci.org/}} has been set up for the |
77c5e0bc EK |
1420 | project\footnote{A work mostly done by the supervisor.}. It is used as a way to |
1421 | run tests and perform code coverage analysis. | |
1422 | ||
fe0a4c48 | 1423 | To be able to build the \name{Eclipse} plugins and run tests for them with Jenkins, the |
77c5e0bc | 1424 | component assembly project |
fe0a4c48 | 1425 | \name{Buckminster}\footnote{\url{http://www.eclipse.org/buckminster/}} is used, |
77c5e0bc EK |
1426 | through its plugin for Jenkins. Buckminster provides for a way to specify the |
1427 | resources needed for building a project and where and how to find them. | |
1428 | Buckminster also handles the setup of a target environment to run the tests in. | |
fe0a4c48 EK |
1429 | All this is needed because the code to build depends on an \name{Eclipse} |
1430 | installation with various plugins. | |
77c5e0bc EK |
1431 | |
1432 | \subsection{Problems with AspectJ} | |
1433 | The Buckminster build worked fine until introducing AspectJ into the project. | |
1434 | When building projects using AspectJ, there are some additional steps that needs | |
1435 | to be performed. First of all, the aspects themselves must be compiled. Then the | |
1436 | aspects needs to be woven with the classes they affect. This demands a process | |
1437 | that does multiple passes over the source code. | |
1438 | ||
fe0a4c48 EK |
1439 | When using AspectJ with \name{Eclipse}, the specialized compilation and the |
1440 | weaving can be handled by the \name{AspectJ Development | |
77c5e0bc | 1441 | Tools}\footnote{\url{https://www.eclipse.org/ajdt/}}. This works all fine, but |
fe0a4c48 EK |
1442 | it complicates things when trying to build a project depending on \name{Eclipse} |
1443 | plugins outside of \name{Eclipse}. There is supposed to be a way to specify a | |
1444 | compiler adapter for javac, together with the file extensions for the file types | |
1445 | it shall operate. The AspectJ compiler adapter is called | |
77c5e0bc EK |
1446 | \typewithref{org.aspectj.tools.ant.taskdefs}{Ajc11CompilerAdapter}, and it works |
1447 | with files that has the extensions \code{*.java} and \code{*.aj}. I tried to | |
1448 | setup this in the build properties file for the project containing the aspects, | |
1449 | but to no avail. The project containing the aspects does not seem to be built at | |
1450 | all, and the projects that depends on it complains that they cannot find certain | |
1451 | classes. | |
1452 | ||
fe0a4c48 | 1453 | I then managed to write an \name{Ant}\footnote{\url{https://ant.apache.org/}} |
77c5e0bc EK |
1454 | build file that utilizes the AspectJ compiler adapter, for the |
1455 | \code{no.uio.ifi.refaktor} plugin. The problem was then that it could no longer | |
1456 | take advantage of the environment set up by Buckminster. The solution to this | |
1457 | particular problem was of a ``hacky'' nature. It involves exporting the plugin | |
1458 | dependencies for the project to an Ant build file, and copy the exported path | |
1459 | into the existing build script. But then the Ant script needs to know where the | |
fe0a4c48 EK |
1460 | local \name{Eclipse} installation is located. This is no problem when building |
1461 | on a local machine, but to utilize the setup done by Buckminster is a problem | |
1462 | still unsolved. To get the classpath for the build setup correctly, and here | |
1463 | comes the most ``hacky'' part of the solution, the Ant script has a target for | |
1464 | copying the classpath elements into a directory relative to the project | |
1465 | directory and checking it into Git. When no \code{ECLIPSE\_HOME} property is set | |
1466 | while running Ant, the script uses the copied plugins instead of the ones | |
1467 | provided by the \name{Eclipse} installation when building the project. This | |
1468 | obviously creates some problems with maintaining the list of dependencies in the | |
1469 | Ant file, as well as remembering to copy the plugins every time the list of | |
1470 | dependencies change. | |
77c5e0bc EK |
1471 | |
1472 | The Ant script described above is run by Jenkins before the Buckminster setup | |
1473 | and build. When setup like this, the Buckminster build succeeds for the projects | |
1474 | not using AspectJ, and the tests are run as normal. This is all good, but it | |
1475 | feels a little scary, since the reason for Buckminster not working with AspectJ | |
1476 | is still unknown. | |
1477 | ||
1478 | The problems with building with AspectJ on the Jenkins server lasted for a | |
1479 | while, before they were solved. This is reflected in the ``Test Result Trend'' | |
1480 | and ``Code Coverage Trend'' reported by Jenkins. | |
0bd768f3 | 1481 | |
3b7c1d90 | 1482 | |
a5317dcf | 1483 | |
5837a41f EK |
1484 | \chapter{Refactorings in Eclipse JDT: Design, Shortcomings and Wishful |
1485 | Thinking}\label{ch:jdt_refactorings} | |
1486 | ||
1487 | This chapter will deal with some of the design behind refactoring support in | |
fe0a4c48 | 1488 | \name{Eclipse}, and the JDT in specific. After which it will follow a section about |
5837a41f EK |
1489 | shortcomings of the refactoring API in terms of composition of refactorings. The |
1490 | chapter will be concluded with a section telling some of the ways the | |
1491 | implementation of refactorings in the JDT could have worked to facilitate | |
1492 | composition of refactorings. | |
055dca93 | 1493 | |
b0e80574 | 1494 | \section{Design} |
fe0a4c48 | 1495 | The refactoring world of \name{Eclipse} can in general be separated into two parts: The |
b289552b | 1496 | language independent part and the part written for a specific programming |
07e173d4 EK |
1497 | language -- the language that is the target of the supported refactorings. |
1498 | \todo{What about the language specific part?} | |
f041551b EK |
1499 | |
1500 | \subsection{The Language Toolkit} | |
1326eec6 EK |
1501 | The Language Toolkit\footnote{The content of this section is a mixture of |
1502 | written material from | |
1503 | \url{https://www.eclipse.org/articles/Article-LTK/ltk.html} and | |
1504 | \url{http://www.eclipse.org/articles/article.php?file=Article-Unleashing-the-Power-of-Refactoring/index.html}, | |
1505 | the LTK source code and my own memory.}, or LTK for short, is the framework that | |
fe0a4c48 | 1506 | is used to implement refactorings in \name{Eclipse}. It is language independent and |
1326eec6 EK |
1507 | provides the abstractions of a refactoring and the change it generates, in the |
1508 | form of the classes \typewithref{org.eclipse.ltk.core.refactoring}{Refactoring} | |
1509 | and \typewithref{org.eclipse.ltk.core.refactoring}{Change}. | |
1510 | ||
1511 | There are also parts of the LTK that is concerned with user interaction, but | |
1512 | they will not be discussed here, since they are of little value to us and our | |
1513 | use of the framework. We are primarily interested in the parts that can be | |
1514 | automated. | |
f041551b EK |
1515 | |
1516 | \subsubsection{The Refactoring Class} | |
1517 | The abstract class \type{Refactoring} is the core of the LTK framework. Every | |
1518 | refactoring that is going to be supported by the LTK have to end up creating an | |
1519 | instance of one of its subclasses. The main responsibilities of subclasses of | |
1520 | \type{Refactoring} is to implement template methods for condition checking | |
1521 | (\methodwithref{org.eclipse.ltk.core.refactoring.Refactoring}{checkInitialConditions} | |
1522 | and | |
1523 | \methodwithref{org.eclipse.ltk.core.refactoring.Refactoring}{checkFinalConditions}), | |
1524 | in addition to the | |
1525 | \methodwithref{org.eclipse.ltk.core.refactoring.Refactoring}{createChange} | |
07e173d4 EK |
1526 | method that creates and returns an instance of the \type{Change} class. |
1527 | ||
1528 | If the refactoring shall support that others participate in it when it is | |
1529 | executed, the refactoring has to be a processor-based | |
1530 | refactoring\typeref{org.eclipse.ltk.core.refactoring.participants.ProcessorBasedRefactoring}. | |
1531 | It then delegates to its given | |
1532 | \typewithref{org.eclipse.ltk.core.refactoring.participants}{RefactoringProcessor} | |
1326eec6 EK |
1533 | for condition checking and change creation. Participating in a refactoring can |
1534 | be useful in cases where the changes done to programming source code affects | |
1535 | other related resources in the workspace. This can be names or paths in | |
1536 | configuration files, or maybe one would like to perform additional logging of | |
1537 | changes done in the workspace. | |
f041551b EK |
1538 | |
1539 | \subsubsection{The Change Class} | |
07e173d4 EK |
1540 | This class is the base class for objects that is responsible for performing the |
1541 | actual workspace transformations in a refactoring. The main responsibilities for | |
1542 | its subclasses is to implement the | |
1543 | \methodwithref{org.eclipse.ltk.core.refactoring.Change}{perform} and | |
1544 | \methodwithref{org.eclipse.ltk.core.refactoring.Change}{isValid} methods. The | |
1545 | \method{isValid} method verifies that the change object is valid and thus can be | |
1546 | executed by calling its \method{perform} method. The \method{perform} method | |
1547 | performs the desired change and returns an undo change that can be executed to | |
1548 | reverse the effect of the transformation done by its originating change object. | |
1549 | ||
61420ef7 | 1550 | \subsubsection{Executing a Refactoring}\label{executing_refactoring} |
07e173d4 EK |
1551 | The life cycle of a refactoring generally follows two steps after creation: |
1552 | condition checking and change creation. By letting the refactoring object be | |
1553 | handled by a | |
1554 | \typewithref{org.eclipse.ltk.core.refactoring}{CheckConditionsOperation} that | |
1555 | in turn is handled by a | |
1556 | \typewithref{org.eclipse.ltk.core.refactoring}{CreateChangeOperation}, it is | |
1557 | assured that the change creation process is managed in a proper manner. | |
1558 | ||
1559 | The actual execution of a change object has to follow a detailed life cycle. | |
1560 | This life cycle is honored if the \type{CreateChangeOperation} is handled by a | |
1561 | \typewithref{org.eclipse.ltk.core.refactoring}{PerformChangeOperation}. If also | |
1562 | an undo manager\typeref{org.eclipse.ltk.core.refactoring.IUndoManager} is set | |
1563 | for the \type{PerformChangeOperation}, the undo change is added into the undo | |
1564 | history. | |
055dca93 | 1565 | |
b0e80574 | 1566 | \section{Shortcomings} |
80663734 | 1567 | This section is introduced naturally with a conclusion: The JDT refactoring |
5837a41f EK |
1568 | implementation does not facilitate composition of refactorings. |
1569 | \todo{refine}This section will try to explain why, and also identify other | |
1570 | shortcomings of both the usability and the readability of the JDT refactoring | |
1571 | source code. | |
80663734 EK |
1572 | |
1573 | I will begin at the end and work my way toward the composition part of this | |
1574 | section. | |
1575 | ||
5837a41f | 1576 | \subsection{Absence of Generics in Eclipse Source Code} |
80663734 | 1577 | This section is not only concerning the JDT refactoring API, but also large |
fe0a4c48 | 1578 | quantities of the \name{Eclipse} source code. The code shows a striking absence of the |
80663734 | 1579 | Java language feature of generics. It is hard to read a class' interface when |
5837a41f EK |
1580 | methods return objects or takes parameters of raw types such as \type{List} or |
1581 | \type{Map}. This sometimes results in having to read a lot of source code to | |
1582 | understand what is going on, instead of relying on the available interfaces. In | |
1583 | addition, it results in a lot of ugly code, making the use of typecasting more | |
1584 | of a rule than an exception. | |
1585 | ||
1586 | \subsection{Composite Refactorings Will Not Appear as Atomic Actions} | |
1587 | ||
1588 | \subsubsection{Missing Flexibility from JDT Refactorings} | |
1589 | The JDT refactorings are not made with composition of refactorings in mind. When | |
1590 | a JDT refactoring is executed, it assumes that all conditions for it to be | |
1326eec6 | 1591 | applied successfully can be found by reading source files that have been |
5837a41f EK |
1592 | persisted to disk. They can only operate on the actual source material, and not |
1593 | (in-memory) copies thereof. This constitutes a major disadvantage when trying to | |
1326eec6 EK |
1594 | compose refactorings, since if an exception occurs in the middle of a sequence |
1595 | of refactorings, it can leave the project in a state where the composite | |
1596 | refactoring was only partially executed. It makes it hard to discard the changes | |
5837a41f EK |
1597 | done without monitoring and consulting the undo manager, an approach that is not |
1598 | bullet proof. | |
1599 | ||
1600 | \subsubsection{Broken Undo History} | |
1601 | When designing a composed refactoring that is to be performed as a sequence of | |
1602 | refactorings, you would like it to appear as a single change to the workspace. | |
1603 | This implies that you would also like to be able to undo all the changes done by | |
1604 | the refactoring in a single step. This is not the way it appears when a sequence | |
1605 | of JDT refactorings is executed. It leaves the undo history filled up with | |
1606 | individual undo actions corresponding to every single JDT refactoring in the | |
fe0a4c48 | 1607 | sequence. This problem is not trivial to handle in \name{Eclipse} |
e123ab03 | 1608 | \see{hacking_undo_history}. |
5837a41f EK |
1609 | |
1610 | \section{Wishful Thinking} | |
3727b75b | 1611 | \todoin{???} |
80663734 | 1612 | |
b0e80574 EK |
1613 | \chapter{Composite Refactorings in Eclipse} |
1614 | ||
1615 | \section{A Simple Ad Hoc Model} | |
fe0a4c48 | 1616 | As pointed out in \myref{ch:jdt_refactorings}, the \name{Eclipse} JDT refactoring model |
8b6b22c8 EK |
1617 | is not very well suited for making composite refactorings. Therefore a simple |
1618 | model using changer objects (of type \type{RefaktorChanger}) is used as an | |
fe0a4c48 | 1619 | abstraction layer on top of the existing \name{Eclipse} refactorings, instead of |
50954fde EK |
1620 | extending the \typewithref{org.eclipse.ltk.core.refactoring}{Refactoring} class. |
1621 | ||
1622 | The use of an additional abstraction layer is a deliberate choice. It is due to | |
1623 | the problem of creating a composite | |
1624 | \typewithref{org.eclipse.ltk.core.refactoring}{Change} that can handle text | |
1625 | changes that interfere with each other. Thus, a \type{RefaktorChanger} may, or | |
1626 | may not, take advantage of one or more existing refactorings, but it is always | |
1627 | intended to make a change to the workspace. | |
1628 | ||
1629 | \subsection{A typical \type{RefaktorChanger}} | |
1630 | The typical refaktor changer class has two responsibilities, checking | |
1631 | preconditions and executing the requested changes. This is not too different | |
1632 | from the responsibilities of an LTK refactoring, with the distinction that a | |
1633 | refaktor changer also executes the change, while an LTK refactoring is only | |
1634 | responsible for creating the object that can later be used to do the job. | |
1635 | ||
1636 | Checking of preconditions is typically done by an | |
1637 | \typewithref{no.uio.ifi.refaktor.analyze.analyzers}{Analyzer}. If the | |
1638 | preconditions validate, the upcoming changes are executed by an | |
1639 | \typewithref{no.uio.ifi.refaktor.change.executors}{Executor}. | |
b0e80574 EK |
1640 | |
1641 | \section{The Extract and Move Method Refactoring} | |
61420ef7 EK |
1642 | %The Extract and Move Method Refactoring is implemented mainly using these |
1643 | %classes: | |
1644 | %\begin{itemize} | |
1645 | % \item \type{ExtractAndMoveMethodChanger} | |
1646 | % \item \type{ExtractAndMoveMethodPrefixesExtractor} | |
1647 | % \item \type{Prefix} | |
1648 | % \item \type{PrefixSet} | |
1649 | %\end{itemize} | |
1650 | ||
1651 | \subsection{The Building Blocks} | |
1652 | This is a composite refactoring, and hence is built up using several primitive | |
b5c7bb1b EK |
1653 | refactorings. These basic building blocks are, as its name implies, the |
1654 | \ExtractMethod refactoring\citing{refactoring} and the \MoveMethod | |
fe0a4c48 | 1655 | refactoring\citing{refactoring}. In \name{Eclipse}, the implementations of these |
b5c7bb1b | 1656 | refactorings are found in the classes |
61420ef7 EK |
1657 | \typewithref{org.eclipse.jdt.internal.corext.refactoring.code}{ExtractMethodRefactoring} |
1658 | and | |
1659 | \typewithref{org.eclipse.jdt.internal.corext.refactoring.structure}{MoveInstanceMethodProcessor}, | |
1660 | where the last class is designed to be used together with the processor-based | |
1661 | \typewithref{org.eclipse.ltk.core.refactoring.participants}{MoveRefactoring}. | |
1662 | ||
1663 | \subsubsection{The ExtractMethodRefactoring Class} | |
1664 | This class is quite simple in its use. The only parameters it requires for | |
1665 | construction is a compilation | |
1666 | unit\typeref{org.eclipse.jdt.core.ICompilationUnit}, the offset into the source | |
1667 | code where the extraction shall start, and the length of the source to be | |
1668 | extracted. Then you have to set the method name for the new method together with | |
50954fde | 1669 | its visibility and some not so interesting parameters. |
61420ef7 EK |
1670 | |
1671 | \subsubsection{The MoveInstanceMethodProcessor Class} | |
fe0a4c48 EK |
1672 | For the \refa{Move Method}, the processor requires a little more advanced input than |
1673 | the class for the \refa{Extract Method}. For construction it requires a method | |
50954fde EK |
1674 | handle\typeref{org.eclipse.jdt.core.IMethod} for the method that is to be moved. |
1675 | Then the target for the move have to be supplied as the variable binding from a | |
1676 | chosen variable declaration. In addition to this, one have to set some | |
1677 | parameters regarding setters/getters, as well as delegation. | |
61420ef7 | 1678 | |
50954fde EK |
1679 | To make a working refactoring from the processor, one have to create a |
1680 | \type{MoveRefactoring} with it. | |
b0e80574 | 1681 | |
356782a0 | 1682 | \subsection{The ExtractAndMoveMethodChanger} |
50954fde | 1683 | |
61420ef7 | 1684 | The \typewithref{no.uio.ifi.refaktor.changers}{ExtractAndMoveMethodChanger} |
50954fde EK |
1685 | class is a subclass of the class |
1686 | \typewithref{no.uio.ifi.refaktor.changers}{RefaktorChanger}. It is responsible | |
1687 | for analyzing and finding the best target for, and also executing, a composition | |
fe0a4c48 | 1688 | of the \refa{Extract Method} and \refa{Move Method} refactorings. This particular changer is |
50954fde EK |
1689 | the one of my changers that is closest to being a true LTK refactoring. It can |
1690 | be reworked to be one if the problems with overlapping changes are resolved. The | |
1691 | changer requires a text selection and the name of the new method, or else a | |
1692 | method name will be generated. The selection has to be of the type | |
1693 | \typewithref{no.uio.ifi.refaktor.utils}{CompilationUnitTextSelection}. This | |
1694 | class is a custom extension to | |
1695 | \typewithref{org.eclipse.jface.text}{TextSelection}, that in addition to the | |
1696 | basic offset, length and similar methods, also carry an instance of the | |
1697 | underlying compilation unit handle for the selection. | |
1698 | ||
ae138b9d EK |
1699 | \subsubsection{The |
1700 | \type{ExtractAndMoveMethodAnalyzer}}\label{extractAndMoveMethodAnalyzer} | |
50954fde EK |
1701 | The analysis and precondition checking is done by the |
1702 | \typewithref{no.uio.ifi.refaktor.analyze.analyzers}{ExtractAnd\-MoveMethodAnalyzer}. | |
1703 | First is check whether the selection is a valid selection or not, with respect | |
1704 | to statement boundaries and that it actually contains any selections. Then it | |
1705 | checks the legality of both extracting the selection and also moving it to | |
95c0f364 EK |
1706 | another class. This checking of is performed by a range of checkers |
1707 | \see{checkers}. If the selection is approved as legal, it is analyzed to find | |
1708 | the presumably best target to move the extracted method to. | |
50954fde EK |
1709 | |
1710 | For finding the best suitable target the analyzer is using a | |
1711 | \typewithref{no.uio.ifi.refaktor.analyze.collectors}{PrefixesCollector} that | |
ae138b9d EK |
1712 | collects all the possible candidate targets for the refactoring. All the |
1713 | non-candidates is found by an | |
50954fde | 1714 | \typewithref{no.uio.ifi.refaktor.analyze.collectors}{UnfixesCollector} that |
b8fce5af | 1715 | collects all the targets that will give some kind of error if used. (For |
3ab3e132 | 1716 | details about the property collectors, see \myref{propertyCollectors}.) All |
b8fce5af | 1717 | prefixes (and unfixes) are represented by a |
a6415293 EK |
1718 | \typewithref{no.uio.ifi.refaktor.extractors}{Prefix}, and they are collected |
1719 | into sets of prefixes. The safe prefixes is found by subtracting from the set of | |
b8fce5af | 1720 | candidate prefixes the prefixes that is enclosing any of the unfixes. A prefix |
a6415293 EK |
1721 | is enclosing an unfix if the unfix is in the set of its sub-prefixes. As an |
1722 | example, \texttt{``a.b''} is enclosing \texttt{``a''}, as is \texttt{``a''}. The | |
1723 | safe prefixes is unified in a \type{PrefixSet}. If a prefix has only one | |
1724 | occurrence, and is a simple expression, it is considered unsuitable as a move | |
1725 | target. This occurs in statements such as \texttt{``a.foo()''}. For such | |
1726 | statements it bares no meaning to extract and move them. It only generates an | |
1727 | extra method and the calling of it. | |
50954fde | 1728 | |
fbaa89ce EK |
1729 | The most suitable target for the refactoring is found by finding the prefix with |
1730 | the most occurrences. If two prefixes have the same occurrence count, but they | |
1731 | differ in length, the longest of them is chosen. | |
1732 | ||
0f6e45f8 | 1733 | \todoin{Clean up sections/subsections.} |
50954fde | 1734 | |
c8088eec EK |
1735 | \subsubsection{The |
1736 | \type{ExtractAndMoveMethodExecutor}}\label{extractAndMoveMethodExecutor} | |
50954fde EK |
1737 | If the analysis finds a possible target for the composite refactoring, it is |
1738 | executed by an | |
1739 | \typewithref{no.uio.ifi.refaktor.change.executors}{ExtractAndMoveMethodExecutor}. | |
1740 | It is composed of the two executors known as | |
1741 | \typewithref{no.uio.ifi.refaktor.change.executors}{ExtractMethodRefactoringExecutor} | |
1742 | and | |
1743 | \typewithref{no.uio.ifi.refaktor.change.executors}{MoveMethodRefactoringExecutor}. | |
1744 | The \type{ExtractAndMoveMethodExecutor} is responsible for gluing the two | |
3727b75b | 1745 | together by feeding the \type{MoveMethod\-RefactoringExecutor} with the |
222d172b EK |
1746 | resources needed after executing the extract method refactoring. |
1747 | %\see{postExtractExecution}. | |
50954fde EK |
1748 | |
1749 | \subsubsection{The \type{ExtractMethodRefactoringExecutor}} | |
1750 | This executor is responsible for creating and executing an instance of the | |
1751 | \type{ExtractMethodRefactoring} class. It is also responsible for collecting | |
1752 | some post execution resources that can be used to find the method handle for the | |
1753 | extracted method, as well as information about its parameters, including the | |
1754 | variable they originated from. | |
1755 | ||
1756 | \subsubsection{The \type{MoveMethodRefactoringExecutor}} | |
1757 | This executor is responsible for creating and executing an instance of the | |
1758 | \type{MoveRefactoring}. The move refactoring is a processor-based refactoring, | |
fe0a4c48 | 1759 | and for the \refa{Move Method} refactoring it is the \type{MoveInstanceMethodProcessor} |
50954fde EK |
1760 | that is used. |
1761 | ||
1762 | The handle for the method to be moved is found on the basis of the information | |
fe0a4c48 | 1763 | gathered after the execution of the \refa{Extract Method} refactoring. The only |
50954fde EK |
1764 | information the \type{ExtractMethodRefactoring} is sharing after its execution, |
1765 | regarding find the method handle, is the textual representation of the new | |
1766 | method signature. Therefore it must be parsed, the strings for types of the | |
1767 | parameters must be found and translated to a form that can be used to look up | |
1768 | the method handle from its type handle. They have to be on the unresolved | |
1769 | form.\todo{Elaborate?} The name for the type is found from the original | |
1770 | selection, since an extracted method must end up in the same type as the | |
1771 | originating method. | |
1772 | ||
fe0a4c48 | 1773 | When analyzing a selection prior to performing the \refa{Extract Method} refactoring, a |
50954fde EK |
1774 | target is chosen. It has to be a variable binding, so it is either a field or a |
1775 | local variable/parameter. If the target is a field, it can be used with the | |
1776 | \type{MoveInstanceMethodProcessor} as it is, since the extracted method still is | |
1777 | in its scope. But if the target is local to the originating method, the target | |
1778 | that is to be used for the processor must be among its parameters. Thus the | |
1779 | target must be found among the extracted method's parameters. This is done by | |
1780 | finding the parameter information object that corresponds to the parameter that | |
1781 | was declared on basis of the original target's variable when the method was | |
1782 | extracted. (The extracted method must take one such parameter for each local | |
1783 | variable that is declared outside the selection that is extracted.) To match the | |
1784 | original target with the correct parameter information object, the key for the | |
1785 | information object is compared to the key from the original target's binding. | |
1786 | The source code must then be parsed to find the method declaration for the | |
1787 | extracted method. The new target must be found by searching through the | |
1788 | parameters of the declaration and choose the one that has the same type as the | |
1789 | old binding from the parameter information object, as well as the same name that | |
1790 | is provided by the parameter information object. | |
1791 | ||
1792 | ||
356782a0 EK |
1793 | \subsection{The |
1794 | SearchBasedExtractAndMoveMethodChanger}\label{searchBasedExtractAndMoveMethodChanger} | |
c8088eec EK |
1795 | The |
1796 | \typewithref{no.uio.ifi.refaktor.change.changers}{SearchBasedExtractAndMoveMethodChanger} | |
1797 | is a changer whose purpose is to automatically analyze a method, and execute the | |
1798 | \ExtractAndMoveMethod refactoring on it if it is a suitable candidate for the | |
1799 | refactoring. | |
1800 | ||
1801 | First, the \typewithref{no.uio.ifi.refaktor.analyze.analyzers}{SearchBasedExtractAndMoveMethodAnalyzer} is used | |
1802 | to analyze the method. If the method is found to be a candidate, the result from | |
1803 | the analysis is fed to the \type{ExtractAndMoveMethodExecutor}, whose job is to | |
1804 | execute the refactoring \see{extractAndMoveMethodExecutor}. | |
1805 | ||
1806 | \subsubsection{The SearchBasedExtractAndMoveMethodAnalyzer} | |
1807 | This analyzer is responsible for analyzing all the possible text selections of a | |
1808 | method and then choose the best result out of the analysis results that is, by | |
1809 | the analyzer, considered to be the potential candidates for the Extract and Move | |
1810 | Method refactoring. | |
1811 | ||
1812 | Before the analyzer is able to work with the text selections of a method, it | |
1813 | needs to generate them. To do this, it parses the method to obtain a | |
1814 | \type{MethodDeclaration} for it \see{astEclipse}. Then there is a statement | |
1815 | lists creator that creates statements lists of the different groups of | |
1816 | statements in the body of the method declaration. A text selections generator | |
1817 | generates text selections of all the statement lists for the analyzer to work | |
1818 | with. | |
1819 | ||
1820 | \paragraph{The statement lists creator} | |
1821 | is responsible for generating lists of statements for all the possible levels of | |
1822 | statements in the method. The statement lists creator is implemented as an AST | |
1823 | visitor \see{astVisitor}. It generates lists of statements by visiting all the | |
1824 | blocks in the method declaration and stores their statements in a collection of | |
1825 | statement lists. In addition, it visits all of the other statements that can | |
1826 | have a statement as a child, such as the different control structures and the | |
1827 | labeled statement. | |
1828 | ||
1829 | The switch statement is the only kind of statement that is not straight forward | |
1830 | to obtain the child statements from. It stores all of its children in a flat | |
1831 | list. Its switch case statements are included in this list. This means that | |
1832 | there are potential statement lists between all of these case statements. The | |
1833 | list of statements from a switch statement is therefore traversed, and the | |
1834 | statements between the case statements are grouped as separate lists. | |
1835 | ||
1836 | There is an example of how the statement lists creator would generate lists for | |
1837 | a simple method in \myref{lst:statementListsExample}. | |
1838 | ||
1839 | \begin{listing}[h] | |
1840 | \def\charwidth{5.7pt} | |
1841 | \def\indent{4*\charwidth} | |
1842 | \def\lineheight{\baselineskip} | |
1843 | \def\mintedtop{\lineheight} | |
1844 | ||
1845 | \begin{tikzpicture}[overlay, yscale=-1] | |
1846 | \tikzstyle{overlaybox}=[fill=lightgray,opacity=0.2] | |
1847 | \draw[overlaybox] (0,\mintedtop+\lineheight) rectangle | |
1848 | +(22*\charwidth,10*\lineheight); | |
1849 | \draw[overlaybox] (\indent,\mintedtop+2*\lineheight) rectangle | |
1850 | +(13*\charwidth,\lineheight); | |
1851 | \draw[overlaybox] (2*\indent,\mintedtop+6*\lineheight) rectangle | |
1852 | +(13*\charwidth,2*\lineheight); | |
1853 | \draw[overlaybox] (2*\indent,\mintedtop+9*\lineheight) rectangle | |
1854 | +(13*\charwidth,\lineheight); | |
1855 | \end{tikzpicture} | |
1856 | \begin{minted}{java} | |
1857 | void method() { | |
1858 | if (bool) | |
1859 | b.bar(); | |
1860 | ||
1861 | switch (val) { | |
1862 | case 1: | |
1863 | b.foo(); | |
1864 | c.foo(); | |
1865 | default: | |
1866 | c.foo(); | |
1867 | } | |
1868 | } | |
1869 | \end{minted} | |
1870 | \caption{Example of how the statement lists creator would group a simple method | |
1871 | into lists of statements. Each highlighted rectangle represents a list.} | |
1872 | \label{lst:statementListsExample} | |
1873 | \end{listing} | |
1874 | ||
1875 | \paragraph{The text selections generator} generates text selections for each | |
1876 | list of statements from the statement lists creator. Conceptually, the generator | |
ae138b9d EK |
1877 | generates a text selection for every possible ordered \todo{make clearer} |
1878 | combination of statements in a list. For a list of statements, the boundary | |
1879 | statements span out a text selection. This means that there are many different | |
1880 | lists that could span out the same selection. | |
c8088eec EK |
1881 | |
1882 | In practice, the text selections are calculated by only one traversal of the | |
1883 | statement list. There is a set of generated text selections. For each statement, | |
1884 | there is created a temporary set of selections, in addition to a text selection | |
1885 | based on the offset and length of the statement. This text selection is added to | |
1886 | the temporary set. Then the new selection is added with every selection from the | |
1887 | set of generated text selections. These new selections are added to the | |
1888 | temporary set. Then the temporary set of selections is added to the set of | |
1889 | generated text selections. The result of adding two text selections is a new | |
1890 | text selection spanned out by the two addends. | |
1891 | ||
0fa64de5 EK |
1892 | \begin{listing}[h] |
1893 | \def\charwidth{5.7pt} | |
1894 | \def\indent{4*\charwidth} | |
1895 | \def\lineheight{\baselineskip} | |
1896 | \def\mintedtop{\lineheight} | |
1897 | ||
1898 | \begin{tikzpicture}[overlay, yscale=-1] | |
1899 | \tikzstyle{overlaybox}=[fill=lightgray,opacity=0.2] | |
1900 | ||
1901 | \draw[overlaybox] (2*\charwidth,\mintedtop) rectangle | |
1902 | +(18*\charwidth,\lineheight); | |
1903 | ||
1904 | \draw[overlaybox] (2*\charwidth,\mintedtop+\lineheight) rectangle | |
1905 | +(18*\charwidth,\lineheight); | |
1906 | ||
1907 | \draw[overlaybox] (2*\charwidth,\mintedtop+3*\lineheight) rectangle | |
1908 | +(18*\charwidth,\lineheight); | |
1909 | ||
1910 | \draw[overlaybox] (\indent-3*\charwidth,\mintedtop) rectangle | |
1911 | +(20*\charwidth,2*\lineheight); | |
1912 | ||
1913 | \draw[overlaybox] (3*\charwidth,\mintedtop+\lineheight) rectangle | |
1914 | +(16*\charwidth,3*\lineheight); | |
1915 | ||
1916 | \draw[overlaybox] (\indent,\mintedtop) rectangle | |
1917 | +(14*\charwidth,4*\lineheight); | |
1918 | \end{tikzpicture} | |
1919 | \begin{minted}{java} | |
1920 | statement one; | |
1921 | statement two; | |
1922 | ... | |
1923 | statement k; | |
1924 | \end{minted} | |
1925 | \caption{Example of how the text selections generator would generate text | |
1926 | selections based on a lists of statements. Each highlighted rectangle | |
1927 | represents a text selection.} | |
1928 | \label{lst:textSelectionsExample} | |
1929 | \end{listing} | |
20bcc7bf | 1930 | \todoin{fix \myref{lst:textSelectionsExample}?} |
0fa64de5 | 1931 | |
ae138b9d EK |
1932 | \paragraph{Finding the candidate} for the refactoring is done by analyzing all |
1933 | the generated text selection with the \type{ExtractAndMoveMethodAnalyzer} | |
1934 | \see{extractAndMoveMethodAnalyzer}. If the analyzer generates a useful result, | |
1935 | an \type{ExtractAndMoveMethodCandidate} is created from it, that is kept in a | |
1936 | list of potential candidates. If no candidates are found, the | |
1937 | \type{NoTargetFoundException} is thrown. | |
1938 | ||
1939 | Since only one of the candidates can be chosen, the analyzer must sort out which | |
1940 | candidate to choose. The sorting is done by the static \method{sort} method of | |
1941 | \type{Collections}. The comparison in this sorting is done by an | |
1942 | \type{ExtractAndMoveMethodCandidateComparator}. | |
1943 | \todoin{Write about the | |
1944 | ExtractAndMoveMethodCandidateComparator/FavorNoUnfixesCandidateComparator} | |
1945 | ||
1946 | \paragraph{The complexity} of how many text selections that needs to be analyzed | |
1947 | for a total of $n$ statements is bounded by $O(n^2)$. | |
1948 | ||
3471cd15 | 1949 | \begin{theorem} |
ae138b9d EK |
1950 | The number of text selections that need to be analyzed for each list of |
1951 | statements of length $n$, is exactly | |
1952 | ||
3471cd15 | 1953 | \begin{equation*} |
ae138b9d EK |
1954 | \sum_{i=1}^{n} i = \frac{n(n+1)}{2} |
1955 | \label{eq:complexityStatementList} | |
3471cd15 EK |
1956 | \end{equation*} |
1957 | \label{thm:numberOfTextSelection} | |
1958 | \end{theorem} | |
ae138b9d EK |
1959 | |
1960 | \begin{proof} | |
1961 | For $n=1$ this is trivial: $\frac{1(1+1)}{2} = \frac{2}{2} = 1$. One statement | |
1962 | equals one selection. | |
1963 | ||
1964 | For $n=2$, you get one text selection for the first statement. For the second, | |
1965 | you get one selection for the statement itself, and one selection for the two | |
1966 | of them combined. This equals three selections. $\frac{2(2+1)}{2} = | |
1967 | \frac{6}{2} = 3$. | |
1968 | ||
1969 | For $n=3$, you get 3 selections for the two first statements, as in the case | |
1970 | where $n=2$. In addition you get one selection for the third statement itself, | |
1971 | and two more statements for the combinations of it with the two previous | |
1972 | statements. This equals six selections. $\frac{3(3+1)}{2} = \frac{12}{2} = 6$. | |
1973 | ||
1974 | Assume that for $n=k$ there exists $\frac{k(k+1)}{2}$ text selections. Then we | |
1975 | want to add selections for another statement, following the previous $k$ | |
1976 | statements. So, for $n=k+1$, we get one additional selection for the statement | |
1977 | itself. Then we get one selection for each pair of the new selection and the | |
1978 | previous $k$ statements. So the total number of selections will be the number | |
1979 | of already generated selections, plus $k$ for every pair, plus one for the | |
1980 | statement itself: $\frac{k(k+1)}{2} + k + | |
1981 | 1 = \frac{k(k+1)+2k+2}{2} = \frac{k(k+1)+2(k+1)}{2} = \frac{(k+1)(k+2)}{2} = | |
1982 | \frac{(k+1)((k+1)+1)}{2} = \sum_{i=1}^{k+1} i$ | |
1983 | \end{proof} | |
1984 | ||
3471cd15 EK |
1985 | \begin{theorem} |
1986 | The number of text selections for a body of statements is maximized if all the | |
1987 | statements are at the same level. | |
1988 | \label{thm:textSelectionsMaximized} | |
1989 | \end{theorem} | |
1990 | ||
1991 | \begin{proof} | |
3ab3e132 | 1992 | Assume we have a body of, in total, $k$ statements. Let |
3471cd15 EK |
1993 | $l,\cdots,m,(k-l-\cdots-m)$ be the lengths of the lists of statements in the |
1994 | body, with $l+\cdots+m<k \Rightarrow l,\cdots,m<k$. | |
1995 | ||
1996 | Then, the number of text selections that are generated for the $k$ statements | |
1997 | is | |
1998 | ||
1999 | { | |
2000 | \small | |
2001 | \begin{align*} | |
2002 | \frac{(k-l-\cdots-m)((k-l-\cdots-m)+ 1)}{2} + \frac{l(l+1)}{2} + \cdots + | |
2003 | \frac{m(m+1)}{2} = \\ | |
2004 | \frac{k^2 - 2kl - \cdots - 2km + l^2 + \cdots + m^2 + k - l - \cdots - m}{2} | |
2005 | + \frac{l^2+l}{2} + \cdots + \frac{m^2+m}{2} = \\ | |
2006 | \frac{k^2 + k + 2l^2 - 2kl + \cdots + 2m^2 - 2km}{2} | |
2007 | \end{align*} | |
2008 | } | |
2009 | ||
2010 | It then remains to show that this inequality holds: | |
2011 | ||
2012 | \begin{align*} | |
2013 | \frac{k^2 + k + 2l^2 - 2kl + \cdots + 2m^2 - 2km}{2} < \frac{k(k+1)}{2} = | |
2014 | \frac{k^2 + k}{2} | |
2015 | \end{align*} | |
2016 | ||
2017 | By multiplication by $2$ on both sides, and by removing the equal parts, we get | |
2018 | ||
2019 | \begin{align*} | |
2020 | 2l^2 - 2kl + \cdots + 2m^2 - 2km < 0 | |
2021 | \end{align*} | |
2022 | ||
2023 | Since $l,\cdots,m<k$, we have that $\forall i \in \{l,\cdots,m\} : 2ki > 2i^2$, | |
2024 | so all the pairs of parts on the form $2i^2-2ki$ are negative. In sum, the | |
2025 | inequality holds. | |
2026 | ||
2027 | \end{proof} | |
ae138b9d EK |
2028 | |
2029 | Therefore, the complexity for the number of selections that needs to be analyzed | |
3471cd15 | 2030 | for a body of $n$ statements is $O\bigl(\frac{n(n+1)}{2}\bigr) = O(n^2)$. |
c8088eec | 2031 | |
356782a0 | 2032 | |
222d172b | 2033 | \begin{comment} |
50954fde | 2034 | \subsection{Finding the IMethod}\label{postExtractExecution} |
356782a0 | 2035 | \todoin{Rename section. Write??} |
222d172b | 2036 | \end{comment} |
61420ef7 | 2037 | |
61420ef7 | 2038 | |
b0e80574 | 2039 | \subsection{The Prefix Class} |
a6415293 EK |
2040 | This class exists mainly for holding data about a prefix, such as the expression |
2041 | that the prefix represents and the occurrence count of the prefix within a | |
2042 | selection. In addition to this, it has some functionality such as calculating | |
2043 | its sub-prefixes and intersecting it with another prefix. The definition of the | |
2044 | intersection between two prefixes is a prefix representing the longest common | |
2045 | expression between the two. | |
2046 | ||
b0e80574 | 2047 | \subsection{The PrefixSet Class} |
a6415293 EK |
2048 | A prefix set holds elements of type \type{Prefix}. It is implemented with the |
2049 | help of a \typewithref{java.util}{HashMap} and contains some typical set | |
2050 | operations, but it does not implement the \typewithref{java.util}{Set} | |
2051 | interface, since the prefix set does not need all of the functionality a | |
2052 | \type{Set} requires to be implemented. In addition It needs some other | |
2053 | functionality not found in the \type{Set} interface. So due to the relatively | |
2054 | limited use of prefix sets, and that it almost always needs to be referenced as | |
2055 | such, and not a \type{Set<Prefix>}, it remains as an ad hoc solution to a | |
2056 | concrete problem. | |
2057 | ||
2058 | There are two ways adding prefixes to a \type{PrefixSet}. The first is through | |
2059 | its \method{add} method. This works like one would expect from a set. It adds | |
2060 | the prefix to the set if it does not already contain the prefix. The other way | |
2061 | is to \emph{register} the prefix with the set. When registering a prefix, if the | |
2062 | set does not contain the prefix, it is just added. If the set contains the | |
2063 | prefix, its count gets incremented. This is how the occurrence count is handled. | |
2064 | ||
2065 | The prefix set also computes the set of prefixes that is not enclosing any | |
2066 | prefixes of another set. This is kind of a set difference operation only for | |
2067 | enclosing prefixes. | |
b0e80574 | 2068 | |
5837a41f EK |
2069 | \subsection{Hacking the Refactoring Undo |
2070 | History}\label{hacking_undo_history} | |
a6415293 | 2071 | \todoin{Where to put this section?} |
8fae7b44 EK |
2072 | |
2073 | As an attempt to make multiple subsequent changes to the workspace appear as a | |
2074 | single action (i.e. make the undo changes appear as such), I tried to alter | |
2075 | the undo changes\typeref{org.eclipse.ltk.core.refactoring.Change} in the history | |
2076 | of the refactorings. | |
2077 | ||
2078 | My first impulse was to remove the, in this case, last two undo changes from the | |
f041551b | 2079 | undo manager\typeref{org.eclipse.ltk.core.refactoring.IUndoManager} for the |
fe0a4c48 | 2080 | \name{Eclipse} refactorings, and then add them to a composite |
8fae7b44 EK |
2081 | change\typeref{org.eclipse.ltk.core.refactoring.CompositeChange} that could be |
2082 | added back to the manager. The interface of the undo manager does not offer a | |
2083 | way to remove/pop the last added undo change, so a possible solution could be to | |
4cb06723 EK |
2084 | decorate\citing{designPatterns} the undo manager, to intercept and collect the |
2085 | undo changes before delegating to the \method{addUndo} | |
f041551b | 2086 | method\methodref{org.eclipse.ltk.core.refactoring.IUndoManager}{addUndo} of the |
8fae7b44 EK |
2087 | manager. Instead of giving it the intended undo change, a null change could be |
2088 | given to prevent it from making any changes if run. Then one could let the | |
2089 | collected undo changes form a composite change to be added to the manager. | |
2090 | ||
2091 | There is a technical challenge with this approach, and it relates to the undo | |
2092 | manager, and the concrete implementation | |
2093 | UndoManager2\typeref{org.eclipse.ltk.internal.core.refactoring.UndoManager2}. | |
2094 | This implementation is designed in a way that it is not possible to just add an | |
2095 | undo change, you have to do it in the context of an active | |
2096 | operation\typeref{org.eclipse.core.commands.operations.TriggeredOperations}. | |
2097 | One could imagine that it might be possible to trick the undo manager into | |
2098 | believing that you are doing a real change, by executing a refactoring that is | |
2099 | returning a kind of null change that is returning our composite change of undo | |
2100 | refactorings when it is performed. | |
2101 | ||
2102 | Apart from the technical problems with this solution, there is a functional | |
2103 | problem: If it all had worked out as planned, this would leave the undo history | |
2104 | in a dirty state, with multiple empty undo operations corresponding to each of | |
2105 | the sequentially executed refactoring operations, followed by a composite undo | |
2106 | change corresponding to an empty change of the workspace for rounding of our | |
2107 | composite refactoring. The solution to this particular problem could be to | |
2108 | intercept the registration of the intermediate changes in the undo manager, and | |
2109 | only register the last empty change. | |
2110 | ||
2111 | Unfortunately, not everything works as desired with this solution. The grouping | |
2112 | of the undo changes into the composite change does not make the undo operation | |
2113 | appear as an atomic operation. The undo operation is still split up into | |
2114 | separate undo actions, corresponding to the change done by its originating | |
2115 | refactoring. And in addition, the undo actions has to be performed separate in | |
2116 | all the editors involved. This makes it no solution at all, but a step toward | |
2117 | something worse. | |
2118 | ||
2119 | There might be a solution to this problem, but it remains to be found. The | |
2120 | design of the refactoring undo management is partly to be blamed for this, as it | |
2121 | it is to complex to be easily manipulated. | |
2122 | ||
b0e80574 | 2123 | |
0d7fbd88 | 2124 | |
2f2080fb | 2125 | |
03674629 | 2126 | \chapter{Analyzing Source Code in Eclipse} |
5308274d | 2127 | |
356782a0 | 2128 | \section{The Java model}\label{javaModel} |
fe0a4c48 | 2129 | The Java model of \name{Eclipse} is its internal representation of a Java project. It |
5308274d | 2130 | is light-weight, and has only limited possibilities for manipulating source |
fe0a4c48 | 2131 | code. It is typically used as a basis for the Package Explorer in \name{Eclipse}. |
5308274d EK |
2132 | |
2133 | The elements of the Java model is only handles to the underlying elements. This | |
2134 | means that the underlying element of a handle does not need to actually exist. | |
2135 | Hence the user of a handle must always check that it exist by calling the | |
2136 | \method{exists} method of the handle. | |
2137 | ||
356782a0 | 2138 | The handles with descriptions is listed in \myref{tab:javaModel}. |
4e468834 EK |
2139 | |
2140 | \begin{table}[h] | |
2141 | \centering | |
2142 | ||
2143 | \newcolumntype{L}[1]{>{\hsize=#1\hsize\raggedright\arraybackslash}X}% | |
2144 | % sum must equal number of columns (3) | |
2145 | \begin{tabularx}{\textwidth}{| L{0.7} | L{1.1} | L{1.2} |} | |
2146 | \hline | |
2147 | \textbf{Project Element} & \textbf{Java Model element} & | |
2148 | \textbf{Description} \\ | |
2149 | \hline | |
2150 | Java project & \type{IJavaProject} & The Java project which contains all other objects. \\ | |
2151 | \hline | |
2152 | Source folder /\linebreak[2] binary folder /\linebreak[3] external library & | |
2153 | \type{IPackageFragmentRoot} & Hold source or binary files, can be a folder | |
2154 | or a library (zip / jar file). \\ | |
2155 | \hline | |
2156 | Each package & \type{IPackageFragment} & Each package is below the | |
2157 | \type{IPackageFragmentRoot}, sub-packages are not leaves of the package, | |
2158 | they are listed directed under \type{IPackageFragmentRoot}. \\ | |
2159 | \hline | |
2160 | Java Source file & \type{ICompilationUnit} & The Source file is always below | |
2161 | the package node. \\ | |
2162 | \hline | |
2163 | Types /\linebreak[2] Fields /\linebreak[3] Methods & \type{IType} / | |
2164 | \linebreak[0] | |
2165 | \type{IField} /\linebreak[3] \type{IMethod} & Types, fields and methods. \\ | |
2166 | \hline | |
2167 | \end{tabularx} | |
051cf433 EK |
2168 | \caption{The elements of the Java Model. {\footnotesize Taken from |
2169 | \url{http://www.vogella.com/tutorials/EclipseJDT/article.html}}} | |
356782a0 | 2170 | \label{tab:javaModel} |
4e468834 EK |
2171 | \end{table} |
2172 | ||
2173 | The hierarchy of the Java Model is shown in \myref{fig:javaModel}. | |
2174 | ||
5308274d EK |
2175 | \begin{figure}[h] |
2176 | \centering | |
2177 | \begin{tikzpicture}[% | |
2178 | grow via three points={one child at (0,-0.7) and | |
2179 | two children at (0,-0.7) and (0,-1.4)}, | |
2180 | edge from parent path={(\tikzparentnode.south west)+(0.5,0) |- | |
2181 | (\tikzchildnode.west)}] | |
2182 | \tikzstyle{every node}=[draw=black,thick,anchor=west] | |
2183 | \tikzstyle{selected}=[draw=red,fill=red!30] | |
2184 | \tikzstyle{optional}=[dashed,fill=gray!50] | |
2185 | \node {\type{IJavaProject}} | |
2186 | child { node {\type{IPackageFragmentRoot}} | |
2187 | child { node {\type{IPackageFragment}} | |
2188 | child { node {\type{ICompilationUnit}} | |
2189 | child { node {\type{IType}} | |
2190 | child { node {\type{\{ IType \}*}} | |
2191 | child { node {\type{\ldots}}} | |
2192 | } | |
2193 | child [missing] {} | |
2194 | child { node {\type{\{ IField \}*}}} | |
2195 | child { node {\type{IMethod}} | |
2196 | child { node {\type{\{ IType \}*}} | |
2197 | child { node {\type{\ldots}}} | |
2198 | } | |
2199 | } | |
2200 | child [missing] {} | |
2201 | child [missing] {} | |
2202 | child { node {\type{\{ IMethod \}*}}} | |
2203 | } | |
2204 | child [missing] {} | |
2205 | child [missing] {} | |
2206 | child [missing] {} | |
2207 | child [missing] {} | |
2208 | child [missing] {} | |
2209 | child [missing] {} | |
2210 | child [missing] {} | |
2211 | child { node {\type{\{ IType \}*}}} | |
2212 | } | |
2213 | child [missing] {} | |
2214 | child [missing] {} | |
2215 | child [missing] {} | |
2216 | child [missing] {} | |
2217 | child [missing] {} | |
2218 | child [missing] {} | |
2219 | child [missing] {} | |
2220 | child [missing] {} | |
2221 | child [missing] {} | |
2222 | child { node {\type{\{ ICompilationUnit \}*}}} | |
2223 | } | |
2224 | child [missing] {} | |
2225 | child [missing] {} | |
2226 | child [missing] {} | |
2227 | child [missing] {} | |
2228 | child [missing] {} | |
2229 | child [missing] {} | |
2230 | child [missing] {} | |
2231 | child [missing] {} | |
2232 | child [missing] {} | |
2233 | child [missing] {} | |
2234 | child [missing] {} | |
2235 | child { node {\type{\{ IPackageFragment \}*}}} | |
2236 | } | |
2237 | child [missing] {} | |
2238 | child [missing] {} | |
2239 | child [missing] {} | |
2240 | child [missing] {} | |
2241 | child [missing] {} | |
2242 | child [missing] {} | |
2243 | child [missing] {} | |
2244 | child [missing] {} | |
2245 | child [missing] {} | |
2246 | child [missing] {} | |
2247 | child [missing] {} | |
2248 | child [missing] {} | |
2249 | child [missing] {} | |
2250 | child { node {\type{\{ IPackageFragmentRoot \}*}}} | |
2251 | ; | |
2252 | \end{tikzpicture} | |
fe0a4c48 | 2253 | \caption{The Java model of \name{Eclipse}. ``\type{\{ SomeElement \}*}'' means |
5308274d EK |
2254 | \type{SomeElement} zero or more times. For recursive structures, |
2255 | ``\type{\ldots}'' is used.} | |
2256 | \label{fig:javaModel} | |
2257 | \end{figure} | |
2258 | ||
3ab3e132 | 2259 | \section{The Abstract Syntax Tree} |
fe0a4c48 | 2260 | \name{Eclipse} is following the common paradigm of using an abstract syntax tree for |
03674629 EK |
2261 | source code analysis and manipulation. |
2262 | ||
03674629 EK |
2263 | When parsing program source code into something that can be used as a foundation |
2264 | for analysis, the start of the process follows the same steps as in a compiler. | |
3ab3e132 | 2265 | This is all natural, because the way a compiler analyzes code is no different |
03674629 EK |
2266 | from how source manipulation programs would do it, except for some properties of |
2267 | code that is analyzed in the parser, and that they may be differing in what | |
4e468834 | 2268 | kinds of properties they analyze. Thus the process of translation source code |
03674629 | 2269 | into a structure that is suitable for analyzing, can be seen as a kind of |
65e213db EK |
2270 | interrupted compilation process \see{fig:interruptedCompilationProcess}. |
2271 | ||
2272 | \begin{figure}[h] | |
2273 | \centering | |
2274 | \tikzset{ | |
c876d1a4 | 2275 | base/.style={anchor=north, align=center, rectangle, minimum height=1.4cm}, |
65e213db | 2276 | basewithshadow/.style={base, drop shadow, fill=white}, |
c876d1a4 EK |
2277 | outlined/.style={basewithshadow, draw, rounded corners, minimum |
2278 | width=0.4cm}, | |
2279 | primary/.style={outlined, font=\bfseries}, | |
65e213db | 2280 | dashedbox/.style={outlined, dashed}, |
62563950 EK |
2281 | arrowpath/.style={black, align=center, font=\small}, |
2282 | processarrow/.style={arrowpath, ->, >=angle 90, shorten >=1pt}, | |
65e213db | 2283 | } |
62563950 | 2284 | \begin{tikzpicture}[node distance=1.3cm and 3cm, scale=1, every |
c876d1a4 | 2285 | node/.style={transform shape}] |
62563950 EK |
2286 | \node[base](AuxNode1){\small source code}; |
2287 | \node[primary, right=of AuxNode1, xshift=-2.5cm](Scanner){Scanner}; | |
c876d1a4 | 2288 | \node[primary, right=of Scanner, xshift=0.5cm](Parser){Parser}; |
72e039dc EK |
2289 | \node[dashedbox, below=of Parser](SemanticAnalyzer){Semantic\\Analyzer}; |
2290 | \node[dashedbox, left=of SemanticAnalyzer](SourceCodeOptimizer){Source | |
2291 | Code\\Optimizer}; | |
2292 | \node[dashedbox, below=of SourceCodeOptimizer | |
c876d1a4 | 2293 | ](CodeGenerator){Code\\Generator}; |
72e039dc EK |
2294 | \node[dashedbox, right=of CodeGenerator](TargetCodeOptimizer){Target |
2295 | Code\\Optimizer}; | |
2296 | \node[base, right=of TargetCodeOptimizer](AuxNode2){}; | |
c876d1a4 | 2297 | |
62563950 EK |
2298 | \draw[processarrow](AuxNode1) -- (Scanner); |
2299 | ||
2300 | \path[arrowpath] (Scanner) -- node [sloped](tokens){tokens}(Parser); | |
2301 | \draw[processarrow](Scanner) -- (tokens) -- (Parser); | |
2302 | ||
2303 | \path[arrowpath] (Parser) -- node (syntax){syntax | |
2304 | tree}(SemanticAnalyzer); | |
2305 | \draw[processarrow](Parser) -- (syntax) -- (SemanticAnalyzer); | |
2306 | ||
2307 | \path[arrowpath] (SemanticAnalyzer) -- node | |
2308 | [sloped](annotated){annotated\\tree}(SourceCodeOptimizer); | |
2309 | \draw[processarrow, dashed](SemanticAnalyzer) -- (annotated) -- | |
2310 | (SourceCodeOptimizer); | |
2311 | ||
2312 | \path[arrowpath] (SourceCodeOptimizer) -- node | |
2313 | (intermediate){intermediate code}(CodeGenerator); | |
2314 | \draw[processarrow, dashed](SourceCodeOptimizer) -- (intermediate) -- | |
2315 | (CodeGenerator); | |
2316 | ||
2317 | \path[arrowpath] (CodeGenerator) -- node [sloped](target1){target | |
c876d1a4 | 2318 | code}(TargetCodeOptimizer); |
62563950 EK |
2319 | \draw[processarrow, dashed](CodeGenerator) -- (target1) -- |
2320 | (TargetCodeOptimizer); | |
2321 | ||
2322 | \path[arrowpath](TargetCodeOptimizer) -- node [sloped](target2){target | |
c876d1a4 | 2323 | code}(AuxNode2); |
62563950 | 2324 | \draw[processarrow, dashed](TargetCodeOptimizer) -- (target2) (AuxNode2); |
65e213db | 2325 | \end{tikzpicture} |
72e039dc | 2326 | \caption{Interrupted compilation process. {\footnotesize (Full compilation |
52da2102 EK |
2327 | process borrowed from \emph{Compiler construction: principles and practice} |
2328 | by Kenneth C. Louden\citing{louden1997}.)}} | |
65e213db EK |
2329 | \label{fig:interruptedCompilationProcess} |
2330 | \end{figure} | |
2331 | ||
03674629 EK |
2332 | The process starts with a \emph{scanner}, or lexer. The job of the scanner is to |
2333 | read the source code and divide it into tokens for the parser. Therefore, it is | |
2334 | also sometimes called a tokenizer. A token is a logical unit, defined in the | |
2335 | language specification, consisting of one or more consecutive characters. In | |
3ab3e132 | 2336 | the Java language the tokens can for instance be the \var{this} keyword, a curly |
03674629 | 2337 | bracket \var{\{} or a \var{nameToken}. It is recognized by the scanner on the |
3ab3e132 | 2338 | basis of something equivalent of a regular expression. This part of the process |
03674629 EK |
2339 | is often implemented with the use of a finite automata. In fact, it is common to |
2340 | specify the tokens in regular expressions, that in turn is translated into a | |
2341 | finite automata lexer. This process can be automated. | |
2342 | ||
3ab3e132 | 2343 | The program component used to translate a stream of tokens into something |
03674629 EK |
2344 | meaningful, is called a parser. A parser is fed tokens from the scanner and |
2345 | performs an analysis of the structure of a program. It verifies that the syntax | |
2346 | is correct according to the grammar rules of a language, that is usually | |
2347 | specified in a context-free grammar, and often in a variant of the | |
fe0a4c48 | 2348 | \name{Backus--Naur |
03674629 EK |
2349 | Form}\footnote{\url{https://en.wikipedia.org/wiki/Backus-Naur\_Form}}. The |
2350 | result coming from the parser is in the form of an \emph{Abstract Syntax Tree}, | |
2351 | AST for short. It is called \emph{abstract}, because the structure does not | |
2352 | contain all of the tokens produced by the scanner. It only contain logical | |
2353 | constructs, and because it forms a tree, all kinds of parentheses and brackets | |
2354 | are implicit in the structure. It is this AST that is used when performing the | |
2355 | semantic analysis of the code. | |
2356 | ||
2357 | As an example we can think of the expression \code{(5 + 7) * 2}. The root of | |
fe0a4c48 | 2358 | this tree would in \name{Eclipse} be an \type{InfixExpression} with the operator |
d11bcf4d EK |
2359 | \var{TIMES}, and a left operand that is also an \type{InfixExpression} with the |
2360 | operator \var{PLUS}. The left operand \type{InfixExpression}, has in turn a left | |
2361 | operand of type \type{NumberLiteral} with the value \var{``5''} and a right | |
2362 | operand \type{NumberLiteral} with the value \var{``7''}. The root will have a | |
2363 | right operand of type \type{NumberLiteral} and value \var{``2''}. The AST for | |
2364 | this expression is illustrated in \myref{fig:astInfixExpression}. | |
2365 | ||
3ab3e132 EK |
2366 | Contrary to the Java Model, an abstract syntax tree is a heavy-weight |
2367 | representation of source code. It contains information about properties like | |
2368 | type bindings for variables and variable bindings for names. | |
4e468834 EK |
2369 | |
2370 | ||
d11bcf4d EK |
2371 | \begin{figure}[h] |
2372 | \centering | |
a1d68d95 | 2373 | \begin{tikzpicture}[scale=0.8] |
894dce0d | 2374 | \tikzset{level distance=40pt} |
a1d68d95 EK |
2375 | \tikzset{sibling distance=5pt} |
2376 | \tikzstyle{thescale}=[scale=0.8] | |
2377 | \tikzset{every tree node/.style={align=center}} | |
d11bcf4d | 2378 | \tikzset{edge from parent/.append style={thick}} |
a1d68d95 EK |
2379 | \tikzstyle{inode}=[rectangle,rounded corners,draw,fill=lightgray,drop |
2380 | shadow,align=center] | |
2381 | \tikzset{every internal node/.style={inode}} | |
894dce0d | 2382 | \tikzset{every leaf node/.style={draw=none,fill=none}} |
d11bcf4d | 2383 | |
894dce0d EK |
2384 | \Tree [.\type{InfixExpression} [.\type{InfixExpression} |
2385 | [.\type{NumberLiteral} \var{``5''} ] [.\type{Operator} \var{PLUS} ] | |
2386 | [.\type{NumberLiteral} \var{``7''} ] ] | |
d11bcf4d EK |
2387 | [.\type{Operator} \var{TIMES} ] |
2388 | [.\type{NumberLiteral} \var{``2''} ] | |
2389 | ] | |
2390 | \end{tikzpicture} | |
894dce0d | 2391 | \caption{The abstract syntax tree for the expression \code{(5 + 7) * 2}.} |
d11bcf4d EK |
2392 | \label{fig:astInfixExpression} |
2393 | \end{figure} | |
03674629 | 2394 | |
c8088eec | 2395 | \subsection{The AST in Eclipse}\label{astEclipse} |
fe0a4c48 | 2396 | In \name{Eclipse}, every node in the AST is a child of the abstract superclass |
03674629 EK |
2397 | \typewithref{org.eclipse.jdt.core.dom}{ASTNode}. Every \type{ASTNode}, among a |
2398 | lot of other things, provides information about its position and length in the | |
2399 | source code, as well as a reference to its parent and to the root of the tree. | |
2400 | ||
2401 | The root of the AST is always of type \type{CompilationUnit}. It is not the same | |
2402 | as an instance of an \type{ICompilationUnit}, which is the compilation unit | |
894dce0d | 2403 | handle of the Java model. The children of a \type{CompilationUnit} is an |
03674629 EK |
2404 | optional \type{PackageDeclaration}, zero or more nodes of type |
2405 | \type{ImportDecaration} and all its top-level type declarations that has node | |
2406 | types \type{AbstractTypeDeclaration}. | |
2407 | ||
2408 | An \type{AbstractType\-Declaration} can be one of the types | |
2409 | \type{AnnotationType\-Declaration}, \type{Enum\-Declaration} or | |
2410 | \type{Type\-Declaration}. The children of an \type{AbstractType\-Declaration} | |
2411 | must be a subtype of a \type{BodyDeclaration}. These subtypes are: | |
2412 | \type{AnnotationTypeMember\-Declaration}, \type{EnumConstant\-Declaration}, | |
2413 | \type{Field\-Declaration}, \type{Initializer} and \type{Method\-Declaration}. | |
2414 | ||
2415 | Of the body declarations, the \type{Method\-Declaration} is the most interesting | |
2416 | one. Its children include lists of modifiers, type parameters, parameters and | |
2417 | exceptions. It has a return type node and a body node. The body, if present, is | |
2418 | of type \type{Block}. A \type{Block} is itself a \type{Statement}, and its | |
2419 | children is a list of \type{Statement} nodes. | |
2420 | ||
2421 | There are too many types of the abstract type \type{Statement} to list up, but | |
2422 | there exists a subtype of \type{Statement} for every statement type of Java, as | |
2423 | one would expect. This also applies to the abstract type \type{Expression}. | |
2424 | However, the expression \type{Name} is a little special, since it is both used | |
2425 | as an operand in compound expressions, as well as for names in type declarations | |
2426 | and such. | |
2427 | ||
fe0a4c48 | 2428 | There is an overview of some of the structure of an \name{Eclipse} AST in |
94deee9e EK |
2429 | \myref{fig:astEclipse}. |
2430 | ||
e8173df5 EK |
2431 | \begin{figure}[h] |
2432 | \centering | |
5e5908eb | 2433 | \begin{tikzpicture}[scale=0.8] |
0f918507 EK |
2434 | \tikzset{level distance=50pt} |
2435 | \tikzset{sibling distance=5pt} | |
5e5908eb | 2436 | \tikzstyle{thescale}=[scale=0.8] |
e8173df5 | 2437 | \tikzset{every tree node/.style={align=center}} |
5e5908eb EK |
2438 | \tikzset{edge from parent/.append style={thick}} |
2439 | \tikzstyle{inode}=[rectangle,rounded corners,draw,fill=lightgray,drop | |
2440 | shadow,align=center] | |
2441 | \tikzset{every internal node/.style={inode}} | |
e8173df5 EK |
2442 | \tikzset{every leaf node/.style={draw=none,fill=none}} |
2443 | ||
e601ce99 EK |
2444 | \Tree [.\type{CompilationUnit} [.\type{[ PackageDeclaration ]} [.\type{Name} ] |
2445 | [.\type{\{ Annotation \}*} ] ] | |
2446 | [.\type{\{ ImportDeclaration \}*} [.\type{Name} ] ] | |
0f918507 | 2447 | [.\type{\{ AbstractTypeDeclaration \}+} [.\node(site){\type{\{ |
e601ce99 | 2448 | BodyDeclaration \}*}}; ] [.\type{SimpleName} ] ] |
e8173df5 | 2449 | ] |
e601ce99 | 2450 | \begin{scope}[shift={(0.5,-6)}] |
5e5908eb | 2451 | \node[inode,thescale](root){\type{MethodDeclaration}}; |
e601ce99 | 2452 | \node[inode,thescale](modifiers) at (4.5,-5){\type{\{ IExtendedModifier \}*} |
5e5908eb | 2453 | \\ {\footnotesize (Of type \type{Modifier} or \type{Annotation})}}; |
e601ce99 | 2454 | \node[inode,thescale](typeParameters) at (-6,-3.5){\type{\{ TypeParameter |
5e5908eb | 2455 | \}*}}; |
fbeec228 | 2456 | \node[inode,thescale](parameters) at (-5,-5){\type{\{ |
5e5908eb | 2457 | SingleVariableDeclaration \}*} \\ {\footnotesize (Parameters)}}; |
e601ce99 | 2458 | \node[inode,thescale](exceptions) at (5,-3){\type{\{ Name \}*} \\ |
5e5908eb | 2459 | {\footnotesize (Exceptions)}}; |
e601ce99 | 2460 | \node[inode,thescale](return) at (-6.5,-2){\type{Type} \\ {\footnotesize |
5e5908eb | 2461 | (Return type)}}; |
e601ce99 EK |
2462 | \begin{scope}[shift={(0,-5)}] |
2463 | \Tree [.\node(body){\type{[ Block ]} \\ {\footnotesize (Body)}}; | |
2464 | [.\type{\{ Statement \}*} [.\type{\{ Expression \}*} ] | |
2465 | [.\type{\{ Statement \}*} [.\type{\ldots} ]] | |
2466 | ] | |
2467 | ] | |
2468 | \end{scope} | |
0f918507 | 2469 | \end{scope} |
e601ce99 EK |
2470 | \draw[->,>=triangle 90,shorten >=1pt](root.east)..controls +(east:2) and |
2471 | +(south:1)..(site.south); | |
0f918507 | 2472 | |
5e5908eb EK |
2473 | \draw (root.south) -- (modifiers); |
2474 | \draw (root.south) -- (typeParameters); | |
2475 | \draw (root.south) -- ($ (parameters.north) + (2,0) $); | |
2476 | \draw (root.south) -- (exceptions); | |
2477 | \draw (root.south) -- (return); | |
2478 | \draw (root.south) -- (body); | |
2479 | ||
e8173df5 | 2480 | \end{tikzpicture} |
fe0a4c48 | 2481 | \caption{The format of the abstract syntax tree in \name{Eclipse}.} |
e8173df5 EK |
2482 | \label{fig:astEclipse} |
2483 | \end{figure} | |
94deee9e | 2484 | \todoin{Add more to the AST format tree? \myref{fig:astEclipse}} |
a2868580 | 2485 | |
b8fce5af | 2486 | \section{The ASTVisitor}\label{astVisitor} |
3ab3e132 EK |
2487 | So far, the only thing that has been addressed is how the data that is going to |
2488 | be the basis for our analysis is structured. Another aspect of it is how we are | |
2489 | going to traverse the AST to gather the information we need, so we can conclude | |
2490 | about the properties we are analysing. It is of course possible to start at the | |
2491 | top of the tree, and manually search through its nodes for the ones we are | |
2492 | looking for, but that is a bit inconvenient. To be able to efficiently utilize | |
2493 | such an approach, we would need to make our own framework for traversing the | |
2494 | tree and visiting only the types of nodes we are after. Luckily, this | |
fe0a4c48 | 2495 | functionality is already provided in \name{Eclipse}, by its |
50976f51 EK |
2496 | \typewithref{org.eclipse.jdt.core.dom}{ASTVisitor}. |
2497 | ||
fe0a4c48 EK |
2498 | The \name{Eclipse} AST, together with its \type{ASTVisitor}, follows the |
2499 | \pattern{Visitor} pattern\citing{designPatterns}. The intent of this design | |
2500 | pattern is to facilitate extending the functionality of classes without touching | |
2501 | the classes themselves. | |
0a8ca90c | 2502 | |
fe0a4c48 EK |
2503 | Let us say that there is a class hierarchy of elements. These elements all have |
2504 | a method \method{accept(Visitor visitor)}. In its simplest form, the | |
0a8ca90c EK |
2505 | \method{accept} method just calls the \method{visit} method of the visitor with |
2506 | itself as an argument, like this: \code{visitor.visit(this)}. For the visitors | |
2507 | to be able to extend the functionality of all the classes in the elements | |
2508 | hierarchy, each \type{Visitor} must have one visit method for each concrete | |
2509 | class in the hierarchy. Say the hierarchy consists of the concrete classes | |
2510 | \type{ConcreteElementA} and \type{ConcreteElementB}. Then each visitor must have | |
2511 | the (possibly empty) methods \method{visit(ConcreteElementA element)} and | |
2512 | \method{visit(ConcreteElementB element)}. This scenario is depicted in | |
2513 | \myref{fig:visitorPattern}. | |
50976f51 | 2514 | |
3572a8ac EK |
2515 | \begin{figure}[h] |
2516 | \centering | |
2517 | \tikzstyle{abstract}=[rectangle, draw=black, fill=white, drop shadow, text | |
2518 | centered, anchor=north, text=black, text width=6cm, every one node | |
2519 | part/.style={align=center, font=\bfseries\itshape}] | |
2520 | \tikzstyle{concrete}=[rectangle, draw=black, fill=white, drop shadow, text | |
2521 | centered, anchor=north, text=black, text width=6cm] | |
2522 | \tikzstyle{inheritarrow}=[->, >=open triangle 90, thick] | |
2523 | \tikzstyle{commentarrow}=[->, >=angle 90, dashed] | |
2524 | \tikzstyle{line}=[-, thick] | |
2525 | \tikzset{every one node part/.style={align=center, font=\bfseries}} | |
2526 | \tikzset{every second node part/.style={align=center, font=\ttfamily}} | |
3572a8ac EK |
2527 | |
2528 | \begin{tikzpicture}[node distance=1cm, scale=0.8, every node/.style={transform | |
2529 | shape}] | |
2530 | \node (Element) [abstract, rectangle split, rectangle split parts=2] | |
2531 | { | |
2532 | \nodepart{one}{Element} | |
2533 | \nodepart{second}{+accept(visitor: Visitor)} | |
2534 | }; | |
2535 | \node (AuxNode01) [text width=0, minimum height=2cm, below=of Element] {}; | |
2536 | \node (ConcreteElementA) [concrete, rectangle split, rectangle split | |
2537 | parts=2, left=of AuxNode01] | |
2538 | { | |
2539 | \nodepart{one}{ConcreteElementA} | |
2540 | \nodepart{second}{+accept(visitor: Visitor)} | |
2541 | }; | |
2542 | \node (ConcreteElementB) [concrete, rectangle split, rectangle split | |
2543 | parts=2, right=of AuxNode01] | |
2544 | { | |
2545 | \nodepart{one}{ConcreteElementB} | |
2546 | \nodepart{second}{+accept(visitor: Visitor)} | |
2547 | }; | |
2548 | ||
2549 | \node[comment, below=of ConcreteElementA] (CommentA) {visitor.visit(this)}; | |
2550 | ||
2551 | \node[comment, below=of ConcreteElementB] (CommentB) {visitor.visit(this)}; | |
2552 | ||
2553 | \node (AuxNodeX) [text width=0, minimum height=1cm, below=of AuxNode01] {}; | |
2554 | ||
2555 | \node (Visitor) [abstract, rectangle split, rectangle split parts=2, | |
2556 | below=of AuxNodeX] | |
2557 | { | |
2558 | \nodepart{one}{Visitor} | |
2559 | \nodepart{second}{+visit(ConcreteElementA)\\+visit(ConcreteElementB)} | |
2560 | }; | |
2561 | \node (AuxNode02) [text width=0, minimum height=2cm, below=of Visitor] {}; | |
2562 | \node (ConcreteVisitor1) [concrete, rectangle split, rectangle split | |
2563 | parts=2, left=of AuxNode02] | |
2564 | { | |
2565 | \nodepart{one}{ConcreteVisitor1} | |
2566 | \nodepart{second}{+visit(ConcreteElementA)\\+visit(ConcreteElementB)} | |
2567 | }; | |
2568 | \node (ConcreteVisitor2) [concrete, rectangle split, rectangle split | |
2569 | parts=2, right=of AuxNode02] | |
2570 | { | |
2571 | \nodepart{one}{ConcreteVisitor2} | |
2572 | \nodepart{second}{+visit(ConcreteElementA)\\+visit(ConcreteElementB)} | |
2573 | }; | |
2574 | ||
2575 | ||
2576 | \draw[inheritarrow] (ConcreteElementA.north) -- ++(0,0.7) -| | |
2577 | (Element.south); | |
2578 | \draw[line] (ConcreteElementA.north) -- ++(0,0.7) -| | |
2579 | (ConcreteElementB.north); | |
2580 | ||
2581 | \draw[inheritarrow] (ConcreteVisitor1.north) -- ++(0,0.7) -| | |
2582 | (Visitor.south); | |
2583 | \draw[line] (ConcreteVisitor1.north) -- ++(0,0.7) -| | |
2584 | (ConcreteVisitor2.north); | |
2585 | ||
2586 | \draw[commentarrow] (CommentA.north) -- (ConcreteElementA.south); | |
2587 | \draw[commentarrow] (CommentB.north) -- (ConcreteElementB.south); | |
2588 | ||
2589 | ||
2590 | \end{tikzpicture} | |
2591 | \caption{The Visitor Pattern.} | |
2592 | \label{fig:visitorPattern} | |
2593 | \end{figure} | |
2594 | ||
0a8ca90c EK |
2595 | The use of the visitor pattern can be appropriate when the hierarchy of elements |
2596 | is mostly stable, but the family of operations over its elements is constantly | |
fe0a4c48 | 2597 | growing. This is clearly the case for the \name{Eclipse} AST, since the hierarchy of |
0a8ca90c EK |
2598 | type \type{ASTNode} is very stable, but the functionality of its elements is |
2599 | extended every time someone needs to operate on the AST. Another aspect of the | |
fe0a4c48 | 2600 | \name{Eclipse} implementation is that it is a public API, and the visitor pattern is an |
0a8ca90c EK |
2601 | easy way to provide access to the nodes in the tree. |
2602 | ||
fe0a4c48 | 2603 | The version of the visitor pattern implemented for the AST nodes in \name{Eclipse} also |
0a8ca90c EK |
2604 | provides an elegant way to traverse the tree. It does so by following the |
2605 | convention that every node in the tree first let the visitor visit itself, | |
b3adff95 EK |
2606 | before it also makes all its children accept the visitor. The children are only |
2607 | visited if the visit method of their parent returns \var{true}. This pattern | |
2608 | then makes for a prefix traversal of the AST. If postfix traversal is desired, | |
2609 | the visitors also has \method{endVisit} methods for each node type, that is | |
2610 | called after the \method{visit} method for a node. In addition to these visit | |
2611 | methods, there are also the methods \method{preVisit(ASTNode)}, | |
2612 | \method{postVisit(ASTNode)} and \method{preVisit2(ASTNode)}. The | |
2613 | \method{preVisit} method is called before the type-specific \method{visit} | |
2614 | method. The \method{postVisit} method is called after the type-specific | |
2615 | \method{endVisit}. The type specific \method{visit} is only called if | |
2616 | \method{preVisit2} returns \var{true}. Overriding the \method{preVisit2} is also | |
2617 | altering the behavior of \method{preVisit}, since the default implementation is | |
94c59647 EK |
2618 | responsible for calling it. |
2619 | ||
2620 | An example of a trivial \type{ASTVisitor} is shown in | |
2621 | \myref{lst:astVisitorExample}. | |
2622 | ||
2623 | \begin{listing} | |
2624 | \begin{minted}{java} | |
2625 | public class CollectNamesVisitor extends ASTVisitor { | |
2626 | Collection<Name> names = new LinkedList<Name>(); | |
2627 | ||
2628 | @Override | |
2629 | public boolean visit(QualifiedName node) { | |
2630 | names.add(node); | |
2631 | return false; | |
2632 | } | |
2633 | ||
2634 | @Override | |
2635 | public boolean visit(SimpleName node) { | |
2636 | names.add(node); | |
2637 | return true; | |
2638 | } | |
2639 | } | |
2640 | \end{minted} | |
2641 | \caption{An \type{ASTVisitor} that visits all the names in a subtree and adds | |
2642 | them to a collection, except those names that are children of any | |
2643 | \type{QualifiedName}.} | |
2644 | \label{lst:astVisitorExample} | |
2645 | \end{listing} | |
2646 | ||
b8fce5af EK |
2647 | \section{Property collectors}\label{propertyCollectors} |
2648 | The prefixes and unfixes are found by property | |
2649 | collectors\typeref{no.uio.ifi.refaktor.extractors.collectors.PropertyCollector}. | |
2650 | A property collector is of the \type{ASTVisitor} type, and thus visits nodes of | |
2651 | type \type{ASTNode} of the abstract syntax tree \see{astVisitor}. | |
2652 | ||
2653 | \subsection{The PrefixesCollector} | |
2654 | The \typewithref{no.uio.ifi.refaktor.extractors.collectors}{PrefixesCollector} | |
ccd252c5 | 2655 | finds prefixes that makes up the basis for calculating move targets for the |
fe0a4c48 | 2656 | \refa{Extract and Move Method} refactoring. It visits expression |
b8fce5af EK |
2657 | statements\typeref{org.eclipse.jdt.core.dom.ExpressionStatement} and creates |
2658 | prefixes from its expressions in the case of method invocations. The prefixes | |
2659 | found is registered with a prefix set, together with all its sub-prefixes. | |
2660 | ||
2661 | \subsection{The UnfixesCollector}\label{unfixes} | |
2662 | The \typewithref{no.uio.ifi.refaktor.extractors.collectors}{UnfixesCollector} | |
2663 | finds unfixes within a selection. That is prefixes that cannot be used as a | |
2664 | basis for finding a move target in a refactoring. | |
2665 | ||
2666 | An unfix can be a name that is assigned to within a selection. The reason that | |
2667 | this cannot be allowed, is that the result would be an assignment to the | |
2668 | \type{this} keyword, which is not valid in Java \see{eclipse_bug_420726}. | |
2669 | ||
2670 | Prefixes that originates from variable declarations within the same selection | |
2671 | are also considered unfixes. This is because when a method is moved, it needs to | |
2672 | be called through a variable. If this variable is also within the method that is | |
2673 | to be moved, this obviously cannot be done. | |
2674 | ||
2675 | Also considered as unfixes are variable references that are of types that is not | |
2676 | suitable for moving a methods to. This can be either because it is not | |
2677 | physically possible to move the method to the desired class or that it will | |
2678 | cause compilation errors by doing so. | |
2679 | ||
2680 | If the type binding for a name is not resolved it is considered and unfix. The | |
2681 | same applies to types that is only found in compiled code, so they have no | |
2682 | underlying source that is accessible to us. (E.g. the \type{java.lang.String} | |
2683 | class.) | |
2684 | ||
2685 | Interfaces types are not suitable as targets. This is simply because interfaces | |
3ab3e132 | 2686 | in Java cannot contain methods with bodies. (This thesis does not deal with |
b8fce5af EK |
2687 | features of Java versions later than Java 7. Java 8 has interfaces with default |
2688 | implementations of methods.) Neither are local types allowed. This accounts for | |
2689 | both local and anonymous classes. Anonymous classes are effectively the same as | |
2690 | interface types with respect to unfixes. Local classes could in theory be used | |
2691 | as targets, but this is not possible due to limitations of the implementation of | |
fe0a4c48 | 2692 | the \refa{Extract and Move Method} refactoring. The problem is that the refactoring is |
b8fce5af EK |
2693 | done in two steps, so the intermediate state between the two refactorings would |
2694 | not be legal Java code. In the case of local classes, the problem is that, in | |
2695 | the intermediate step, a selection referencing a local class would need to take | |
2696 | the local class as a parameter if it were to be extracted to a new method. This | |
2697 | new method would need to live in the scope of the declaring class of the | |
2698 | originating method. The local class would then not be in the scope of the | |
2699 | extracted method, thus bringing the source code into an illegal state. One could | |
2700 | imagine that the method was extracted and moved in one operation, without an | |
2701 | intermediate state. Then it would make sense to include variables with types of | |
2702 | local classes in the set of legal targets, since the local classes would then be | |
2703 | in the scopes of the method calls. If this makes any difference for software | |
2704 | metrics that measure coupling would be a different discussion. | |
2705 | ||
2706 | \begin{listing} | |
2707 | \begin{multicols}{2} | |
2708 | \begin{minted}[]{java} | |
2709 | // Before | |
2710 | void declaresLocalClass() { | |
2711 | class LocalClass { | |
2712 | void foo() {} | |
2713 | void bar() {} | |
2714 | } | |
2715 | ||
2716 | LocalClass inst = | |
2717 | new LocalClass(); | |
2718 | inst.foo(); | |
2719 | inst.bar(); | |
2720 | } | |
2721 | \end{minted} | |
2722 | ||
2723 | \columnbreak | |
2724 | ||
2725 | \begin{minted}[]{java} | |
2726 | // After Extract Method | |
2727 | void declaresLocalClass() { | |
2728 | class LocalClass { | |
2729 | void foo() {} | |
2730 | void bar() {} | |
2731 | } | |
2732 | ||
2733 | LocalClass inst = | |
2734 | new LocalClass(); | |
2735 | fooBar(inst); | |
2736 | } | |
2737 | ||
2738 | // Intermediate step | |
2739 | void fooBar(LocalClass inst) { | |
2740 | inst.foo(); | |
2741 | inst.bar(); | |
2742 | } | |
2743 | \end{minted} | |
2744 | \end{multicols} | |
fe0a4c48 | 2745 | \caption{When \refa{Extract and Move Method} tries to use a variable with a local type |
b8fce5af EK |
2746 | as the move target, an intermediate step is taken that is not allowed. Here: |
2747 | \type{LocalClass} is not in the scope of \method{fooBar} in its intermediate | |
2748 | location.} | |
2749 | \label{lst:extractMethod_LocalClass} | |
2750 | \end{listing} | |
2751 | ||
2752 | The last class of names that are considered unfixes is names used in null tests. | |
2753 | These are tests that reads like this: if \texttt{<name>} equals \var{null} then | |
2754 | do something. If allowing variables used in those kinds of expressions as | |
2755 | targets for moving methods, we would end up with code containing boolean | |
2756 | expressions like \texttt{this == null}, which would not be meaningful, since | |
2757 | \var{this} would never be \var{null}. | |
2758 | ||
0a8ca90c | 2759 | |
5195bf0c EK |
2760 | \subsection{The ContainsReturnStatementCollector} |
2761 | The | |
2762 | \typewithref{no.uio.ifi.refaktor.analyze.collectors}{ContainsReturnStatementCollector} | |
2763 | is a very simple property collector. It only visits the return statements within | |
2764 | a selection, and can report whether it encountered a return statement or not. | |
2765 | ||
b8d069e4 EK |
2766 | \subsection{The LastStatementCollector} |
2767 | The \typewithref{no.uio.ifi.refaktor.analyze.collectors}{LastStatementCollector} | |
2768 | collects the last statement of a selection. It does so by only visiting the top | |
2769 | level statements of the selection, and compares the textual end offset of each | |
3ab3e132 | 2770 | encountered statement with the end offset of the previous statement found. |
b8d069e4 | 2771 | |
95c0f364 | 2772 | \section{Checkers}\label{checkers} |
801ff00a | 2773 | \todoin{Check out ExtractMethodAnalyzer from ExtractMethodRefactoring} |
d6f8e65a EK |
2774 | The checkers are a range of classes that checks that text selections complies |
2775 | with certain criteria. All checkers operates under the assumption that the code | |
2776 | they check is free from compilation errors. If a | |
95c0f364 EK |
2777 | \typewithref{no.uio.ifi.refaktor.analyze.analyzers}{Checker} fails, it throws a |
2778 | \type{CheckerException}. The checkers are managed by the | |
2779 | \type{LegalStatementsChecker}, which does not, in fact, implement the | |
2780 | \type{Checker} interface. It does, however, run all the checkers registered with | |
2781 | it, and reports that all statements are considered legal if no | |
08cbba3b | 2782 | \type{CheckerException} is thrown. Many of the checkers either extends the |
f72f72f1 | 2783 | \type{PropertyCollector} or utilizes one or more property collectors to verify |
3ab3e132 | 2784 | some criteria. The checkers registered with the \type{LegalStatementsChecker} |
f72f72f1 | 2785 | are described next. They are run in the order presented below. |
95c0f364 | 2786 | |
a22915d0 EK |
2787 | \subsection{The CallToProtectedOrPackagePrivateMethodChecker} |
2788 | This checker is designed to prevent an error that can occur in situations where | |
2789 | a method is declared in one class, but overridden in another. If a text | |
2790 | selection contains a call to a method like this, and the seletion is extracted | |
2791 | to a new method, the subsequent movement of this method could cause the code to | |
2792 | break. | |
2793 | ||
2794 | The code breaks in situations where the method call in the selection is to a | |
2795 | method that has the \code{protected} modifier, or it does not have any access | |
2796 | modifiers, i.e. it is package-private. The method is not public, so the | |
2797 | \MoveMethod refactoring must make it public, making the moved method able to | |
2798 | call it from its new location. The problem is that the, now public, method is | |
2799 | overridden in a subclass, where it has a protected or package-private status. | |
2800 | This makes the compiler complain that the subclass is trying to reduce the | |
2801 | visibility of a method declared in its superclass. This is not allowed in Java, | |
2802 | and for good reasons. It would make it possible to make a subclass that could | |
2803 | not be a substitute for its superclass. | |
2804 | ||
2805 | The workings of the \type{CallToProtectedOrPackagePrivateMethod\-Checker} is | |
2806 | therefore very simple. It looks for calls to methods that are either protected | |
2807 | or package-private within the selection, and throws an | |
2808 | \type{IllegalExpressionFoundException} if one is found. | |
2809 | ||
2810 | The problem this checker helps to avoid, is a little subtle. The problem does | |
2811 | not arise in the class where the change is done, but in a class derived from it. | |
2812 | This shows that classes acting as superclasses are especially fragile to | |
2813 | introducing errors in the context of automated refactoring. This is also shown | |
2814 | in bug\ldots \todoin{File Eclipse bug report} | |
2815 | ||
2a4b8dea EK |
2816 | \subsection{The InstantiationOfNonStaticInnerClassChecker} |
2817 | When a non-static inner class is instatiated, this must happen in the scope of | |
2818 | its declaring class. This is because it must have access to the members of the | |
2819 | declaring class. If the inner class is public, it is possible to instantiate it | |
2820 | through an instance of its declaring class, but this is not handled by the | |
2821 | \type{MoveInstanceMethodProcessor} in Eclipse when moving a method. Therefore, | |
2822 | performing a move on a method that instantiates a non-static inner class, will | |
2823 | break the code if the instantiation is not handled properly. For this reason, | |
2824 | the \type{InstantiationOfNonStaticInnerClassChecker} does not validate | |
2825 | selections that contains instantiations of non-static inner classes. This | |
2826 | problem is also related to bug\ldots \todoin{File Eclipse bug report} | |
2827 | ||
8d0caf4c EK |
2828 | \subsection{The EnclosingInstanceReferenceChecker} |
2829 | The purpose of this checker is to verify that the names in a selection is not | |
2830 | referencing any enclosing instances. This is for making sure that all references | |
2831 | is legal in a method that is to be moved. Theoretically, some situations could | |
2832 | be easily solved my passing a reference to the referenced class with the moved | |
2833 | method (e.g. when calling public methods), but the dependency on the | |
2834 | \type{MoveInstanceMethodProcessor} prevents this. | |
2835 | ||
2836 | The | |
2837 | \typewithref{no.uio.ifi.refaktor.analyze.analyzers}{EnclosingInstanceReferenceChecker} | |
2838 | is a modified version of the | |
801ff00a | 2839 | \typewithref{org.eclipse.jdt.internal.corext.refactoring.structure.MoveInstanceMethod\-Processor}{EnclosingInstanceReferenceFinder} |
8d0caf4c EK |
2840 | from the \type{MoveInstanceMethodProcessor}. Wherever the |
2841 | \type{EnclosingInstanceReferenceFinder} would create a fatal error status, the | |
2842 | checker throws a \type{CheckerException}. | |
2843 | ||
2844 | It works by first finding all of the enclosing types of a selection. Thereafter | |
2845 | it visits all its simple names to check that they are not references to | |
2846 | variables or methods declared in any of the enclosing types. In addition the | |
2847 | checker visits \var{this}-expressions to verify that no such expressions is | |
2848 | qualified with any name. | |
2849 | ||
9cc2cd59 | 2850 | \subsection{The ReturnStatementsChecker}\label{returnStatementsChecker} |
801ff00a | 2851 | The checker for return statements is meant to verify that if a text selection |
d6f8e65a EK |
2852 | contains a return statement, then every possible execution path within the |
2853 | selection ends in a return statement. This property is important regarding the | |
2854 | \ExtractMethod refactoring. If it holds, it means that a method could be | |
2855 | extracted from the selection, and a call to it could be substituted for the | |
801ff00a EK |
2856 | selection. If the method has a non-void return type, then a call to it would |
2857 | also be a valid return point for the calling method. If its return value is of | |
2858 | the void type, then the \type{ExtractMethodRefactoring} of \name{Eclipse} | |
2859 | appends an empty return statement to the back of the method call. Therefore, the | |
2860 | analysis does not discriminate on either kinds of return statements, with or | |
2861 | without a return value. | |
2862 | ||
2863 | The property description implies that if the selection is free from return | |
d6f8e65a EK |
2864 | statements, then the checker validates. So this is the first thing the checker |
2865 | investigates. | |
2866 | ||
2867 | If the checker proceedes any further, it is because the selection contains one | |
2868 | or more return statements. The next test is therefore to check if the last | |
801ff00a EK |
2869 | statement of the selection ends in either a return or a throw statement. If the |
2870 | last statement of the selection ends in a return statement, then all execution | |
d6f8e65a EK |
2871 | paths within the selection should end in either this, or another, return |
2872 | statement. This is also true for a throw statement, since it causes an immediate | |
2873 | exit from the current block, together with all outer blocks in its control flow | |
2874 | that does not catch the thrown exception. | |
2875 | ||
2876 | Return statements can be either explicit or implicit. An \emph{explicit} return | |
2877 | statement is formed by using the \code{return} keyword, while an \emph{implicit} | |
2878 | return statement is a statement that is not formed by the \code{return} keyword, | |
801ff00a EK |
2879 | but must be the last statement of a method that can have any side effects. This |
2880 | can happen in methods with a void return type. An example is a statement that is | |
2881 | inside one or more blocks. The last statement of a method could for instance be | |
2882 | an if-statement, but the last statement that is executed in the method, and that | |
2883 | can have any side effects, may be located inside the block of the else part of | |
2884 | the if-statement. | |
2885 | ||
2886 | The responsibility for checking that the last statement of the selection | |
2887 | eventually ends in a return or throw statement, is put on the | |
2888 | \type{LastStatementOfSelectionEndsInReturnOrThrowChecker}. For every node | |
2889 | visited, if it is a statement, it does a test to see if the statement is a | |
2890 | return, a throw or if it is an implicit return statement. If this is the case, | |
2891 | no further checking is done. This checking is done in the \code{preVisit2} | |
2892 | method \see{astVisitor}. If the node is not of a type that is being handled by | |
2893 | its type specific visit method, the checker performs a simple test. If the node | |
2894 | being visited is not the last statement of its parent that is also enclosed by | |
2895 | the selection, an \type{IllegalStatementFoundException} is thrown. This ensures | |
2896 | that all statements are taken care of, one way or the other. It also ensures | |
2897 | that the checker is conservative in the way it checks for legality of the | |
2898 | selection. | |
2899 | ||
2900 | To examine if a statement is an implicit return statement, the checker first | |
2901 | finds the last statement declared in its enclosing method. If this statement is | |
2902 | the same as the one under investigation, it is considered an implicit return | |
2903 | statement. If the statements are not the same, the checker does a search to see | |
2904 | if statement examined is also the last statement of the method that can be | |
2905 | reached. This includes the last statement of a block statement, a labeled | |
2906 | statement, a synchronized statement or a try statement, that in turn is the last | |
2907 | statement enclosed by the statement types listed. This search goes through all | |
2908 | the parents of a statement until a statement is found that is not one of the | |
2909 | mentioned acceptable parent statements. If the search ends in a method | |
2910 | declaration, then the statement is considered to be the last reachable statement | |
2911 | of the method, and thus also an implicit return statement. | |
2912 | ||
2913 | There are two kinds of statements that are handled explicitly. It is | |
2914 | if-statements and try-statements. Block, labeled and do-statements are handled | |
2915 | by fall-through to the other two. Do-statements are considered equal to blocks | |
2916 | in this context, since their bodies are always evaluated at least one time. If- | |
2917 | and try-statements are visited only if they are the last node of their parent | |
2918 | within the selection. | |
2919 | ||
2920 | For if-statements, the rule is that if the then-part does not contain any return | |
2921 | or throw statements, it is considered illegal. If it does contain a return or | |
2922 | throw, its else-part is checked. If the else-part is non-existent, or it does | |
2923 | not contain any return or throw statements, it is considered illegal. If the | |
2924 | statement is not regarded illegal, its children are visited. | |
2925 | ||
2926 | Try-statements are handled much the same way as if-statements. Its body must | |
2927 | contain a return or throw. The same applies to its catch clauses and finally | |
2928 | body. | |
2929 | ||
2930 | If the checker does not complain at any point, the selection is considered valid | |
2931 | with respect to return statements. | |
41cde50e EK |
2932 | |
2933 | \subsection{The AmbiguousReturnValueChecker} | |
9cc2cd59 EK |
2934 | This checker verifies that there are no \emph{ambiguous return statements} in a |
2935 | selection. The problem with ambiguous return statements arise when a selection | |
2936 | is chosen to be extracted into a new method, but it needs to return more than | |
2937 | one value from that method. This problem occurs in two situations. The first | |
2938 | situation arise when there is more than one local variable that is both assigned | |
2939 | to within a selection and also referenced after the selection. The other | |
2940 | situation occur when there is only one such assignment, but there is also one or | |
2941 | more return statements in the selection. | |
2942 | ||
2943 | First the checker needs to collect some data. Those data are the binding keys | |
2944 | for all simple names that are assigned to within the selection, including | |
2945 | variable declarations, but excluding fields. The checker also collects whether | |
2946 | there exists a return statement in the selection or not. No further checks of | |
2947 | return statements are needed, since, at this point, the selection is already | |
2948 | checked for illegal return statements \see{returnStatementsChecker}. | |
2949 | ||
2950 | After the binding keys of the assignees are collected, the checker searches the | |
2951 | part of the enclosing method that is after the selection for references whose | |
3ab3e132 EK |
2952 | binding keys are among the collected keys. If more than one unique referral is |
2953 | found, or only one referral is found, but the selection also contains a return | |
2954 | statement, we have a situation with an ambiguous return value, and an exception | |
2955 | is thrown. | |
9cc2cd59 EK |
2956 | |
2957 | %\todoin{Explain why we do not need to consider variables assigned inside | |
2958 | %local/anonymous classes. (The referenced variables need to be final and so | |
2959 | %on\ldots)} | |
41cde50e EK |
2960 | |
2961 | \subsection{The IllegalStatementsChecker} | |
2962 | This checker is designed to check for illegal statements. | |
2963 | ||
08cbba3b EK |
2964 | Any use of the \var{super} keyword is prohibited, since its meaning is altered |
2965 | when moving a method to another class. | |
2966 | ||
2967 | For a \emph{break} statement, there is two situations to consider: A break | |
2968 | statement with or without a label. If the break statement has a label, it is | |
2969 | checked that whole of the labeled statement is inside the selection. Since a | |
2970 | label does not have any binding information, we have to search upwards in the | |
2971 | AST to find the \type{LabeledStatement} that corresponds to the label from the | |
2972 | break statement, and check that it is contained in the selection. If the break | |
2973 | statement does not have a label attached to it, it is checked that its innermost | |
2974 | enclosing loop or switch statement also is inside the selection. | |
2975 | ||
2976 | The situation for a \emph{continue} statement is the same as for a break | |
2977 | statement, except that it is not allowed inside switch statements. | |
2978 | ||
2979 | Regarding \emph{assignments}, two types of assignments is allowed: Assignment to | |
2980 | a non-final variable and assignment to an array access. All other assignments is | |
2981 | regarded illegal. | |
2982 | ||
2983 | \todoin{Finish\ldots} | |
41cde50e | 2984 | |
356782a0 EK |
2985 | |
2986 | \chapter{Benchmarking} | |
2987 | \todoin{Better name than ``benchmarking''?} | |
fe0a4c48 | 2988 | This part of the master project is located in the \name{Eclipse} project |
356782a0 EK |
2989 | \code{no.uio.ifi.refaktor.benchmark}. The purpose of it is to run the equivalent |
2990 | of the \type{SearchBasedExtractAndMoveMethodChanger} | |
2991 | \see{searchBasedExtractAndMoveMethodChanger} over a larger software project, | |
3ab3e132 | 2992 | both to test its robustness but also its effect on different software metrics. |
356782a0 EK |
2993 | |
2994 | \section{The benchmark setup} | |
fe0a4c48 EK |
2995 | The benchmark itself is set up as a \name{JUnit} test case. This is a convenient |
2996 | setup, and utilizes the \name{JUnit Plugin Test Launcher}. This provides us a | |
2997 | with a fully functional \name{Eclipse} workbench. Most importantly, this gives | |
2998 | us access to the Java Model of \name{Eclipse} \see{javaModel}. | |
356782a0 EK |
2999 | |
3000 | \subsection{The ProjectImporter} | |
3001 | The Java project that is going to be used as the data for the benchmark, must be | |
3002 | imported into the JUnit workspace. This is done by the | |
3003 | \typewithref{no.uio.ifi.refaktor.benchmark}{ProjectImporter}. The importer | |
3004 | require the absolute path to the project description file. It is named | |
3005 | \code{.project} and is located at the root of the project directory. | |
3006 | ||
3007 | The project description is loaded to find the name of the project to be | |
3008 | imported. The project that shall be the destination for the import is created in | |
3009 | the workspace, on the base of the name from the description. Then an import | |
3010 | operation is created, based on both the source and destination information. The | |
3011 | import operation is run to perform the import. | |
3012 | ||
3013 | I have found no simple API call to accomplish what the importer does, which | |
fe0a4c48 EK |
3014 | tells me that it may not be too many people performing this particular action. |
3015 | The solution to the problem was found on \name{Stack | |
356782a0 | 3016 | Overflow}\footnote{\url{https://stackoverflow.com/questions/12401297}}. It |
3ab3e132 | 3017 | contains enough dirty details to be considered inconvenient to use, if not |
356782a0 EK |
3018 | wrapping it in a class like my \type{ProjectImporter}. One would probably have |
3019 | to delve into the source code for the import wizard to find out how the import | |
3020 | operation works, if no one had already done it. | |
3021 | ||
8fe94c0b EK |
3022 | \section{Statistics} |
3023 | Statistics for the analysis and changes is captured by the | |
3024 | \typewithref{no.uio.ifi.refaktor.aspects}{StatisticsAspect}. This an | |
fe0a4c48 | 3025 | \emph{aspect} written in \name{AspectJ}. |
8fe94c0b EK |
3026 | |
3027 | \subsection{AspectJ} | |
fe0a4c48 | 3028 | \name{AspectJ}\footnote{\url{http://eclipse.org/aspectj/}} is an extension to |
8fe94c0b EK |
3029 | the Java language, and facilitates combining aspect-oriented programming with |
3030 | the object-oriented programming in Java. | |
3031 | ||
3032 | Aspect-oriented programming is a programming paradigm that is meant to isolate | |
3033 | so-called \emph{cross-cutting concerns} into their own modules. These | |
dd73ba39 EK |
3034 | cross-cutting concerns are functionalities that spans over multiple classes, but |
3035 | may not belong naturally in any of them. It can be functionality that does not | |
3036 | concern the business logic of an application, and thus may be a burden when | |
3037 | entangled with parts of the source code it does not really belong. Examples | |
3038 | include logging, debugging, optimization and security. | |
8fe94c0b | 3039 | |
416b6888 EK |
3040 | Aspects are interacting with other modules by defining advices. The concept of |
3041 | an \emph{advice} is known from both aspect-oriented and functional | |
3042 | programming\citing{wikiAdvice2014}. It is a function that modifies another | |
3043 | function when the latter is run. An advice in AspectJ is somewhat similar to a | |
3044 | method in Java. It is meant to alter the behavior of other methods, and contains | |
3045 | a body that is executed when it is applied. | |
8fe94c0b EK |
3046 | |
3047 | An advice can be applied at a defined \emph{pointcut}. A pointcut picks out one | |
3048 | or more \emph{join points}. A join point is a well-defined point in the | |
3049 | execution of a program. It can occur when calling a method defined for a | |
3050 | particular class, when calling all methods with the same name, | |
3051 | accessing/assigning to a particular field of a given class and so on. An advice | |
dd73ba39 EK |
3052 | can be declared to run both before, after returning from a pointcut, when there |
3053 | is thrown an exception in the pointcut or after the pointcut either returns or | |
3054 | throws an exception. In addition to picking out join points, a pointcut can | |
3055 | also bind variables from its context, so they can be accessed in the body of an | |
3056 | advice. An example of a pointcut and an advice is found in | |
3057 | \myref{lst:aspectjExample}. | |
8fe94c0b EK |
3058 | |
3059 | \begin{listing}[h] | |
c8088eec | 3060 | \begin{minted}{aspectj} |
8fe94c0b EK |
3061 | pointcut methodAnalyze( |
3062 | SearchBasedExtractAndMoveMethodAnalyzer analyzer) : | |
3063 | call(* SearchBasedExtractAndMoveMethodAnalyzer.analyze()) | |
3064 | && target(analyzer); | |
3065 | ||
3066 | after(SearchBasedExtractAndMoveMethodAnalyzer analyzer) : | |
3067 | methodAnalyze(analyzer) { | |
3068 | statistics.methodCount++; | |
3069 | debugPrintMethodAnalysisProgress(analyzer.method); | |
3070 | } | |
3071 | \end{minted} | |
3072 | \caption{An example of a pointcut named \method{methodAnalyze}, | |
3073 | and an advice defined to be applied after it has occurred.} | |
3074 | \label{lst:aspectjExample} | |
3075 | \end{listing} | |
3076 | ||
dd73ba39 EK |
3077 | \subsection{The Statistics class} |
3078 | The statistics aspect stores statistical information in an object of type | |
3079 | \type{Statistics}. As of now, the aspect needs to be initialized at the point in | |
3080 | time where it is desired that it starts its data gathering. At any point in time | |
3081 | the statistics aspect can be queried for a snapshot of the current statistics. | |
3082 | ||
3083 | The \type{Statistics} class also include functionality for generating a report | |
3084 | of its gathered statistics. The report can be given either as a string or it can | |
3085 | be written to a file. | |
3086 | ||
3087 | \subsection{Advices} | |
3088 | The statistics aspect contains advices for gathering statistical data from | |
3089 | different parts of the benchmarking process. It captures statistics from both | |
3090 | the analysis part and the execution part of the composite \ExtractAndMoveMethod | |
3091 | refactoring. | |
3092 | ||
3093 | For the analysis part, there are advices to count the number of text selections | |
3094 | analyzed and the number of methods, types, compilation units and packages | |
3095 | analyzed. There are also advices that counts for how many of the methods there | |
3096 | is found a selection that is a candidate for the refactoring, and for how many | |
3ab3e132 | 3097 | methods there is not. |
dd73ba39 EK |
3098 | |
3099 | There exists advices for counting both the successful and unsuccessful | |
3100 | executions of all the refactorings. Both for the \ExtractMethod and \MoveMethod | |
3101 | refactorings in isolation, as well as for the combination of them. | |
3102 | ||
8fe94c0b | 3103 | \section{Optimizations} |
41293210 | 3104 | When looking for optimizations to make for the benchmarking process, I used the |
fe0a4c48 | 3105 | \name{VisualVM}\footnote{\url{http://visualvm.java.net/}} for the Java Virtual |
41293210 EK |
3106 | Machine to both profile the application and also to make memory dumps of its |
3107 | heap. | |
8fe94c0b EK |
3108 | |
3109 | \subsection{Caching} | |
41293210 EK |
3110 | When profiling the benchmark process before making any optimizations, it early |
3111 | became apparent that the parsing of source code was a place to direct attention | |
3112 | towards. This discovery was done when only \emph{analyzing} source code, before | |
3113 | trying to do any \emph{manipulation} of it. Caching of the parsed ASTs seemed | |
3114 | like the best way to save some time, as expected. With only a simple cache of | |
3115 | the most recently used AST, the analysis time was speeded up by a factor of | |
3116 | around | |
3117 | 20. This number depends a little upon which type of system the analysis was | |
3118 | run. | |
3119 | ||
3120 | The caching is managed by a cache manager, that now, by default, utilizes the | |
3121 | not so well known feature of Java called a \emph{soft reference}. Soft | |
3122 | references are best explained in the context of weak references. A \emph{weak | |
3123 | reference} is a reference to an object instance that is only guaranteed to | |
3124 | persist as long as there is a \emph{strong reference} or a soft reference | |
3125 | referring the same object. If no such reference is found, its referred object is | |
3126 | garbage collected. A strong reference is basically the same as a regular Java | |
3127 | reference. A soft reference has the same guarantees as a week reference when it | |
3128 | comes to its relation to strong references, but it is not necessarily garbage | |
3129 | collected whenever there exists no strong references to it. A soft reference | |
3130 | \emph{may} reside in memory as long as the JVM has enough free memory in the | |
3131 | heap. A soft reference will therefore usually perform better than a weak | |
3132 | reference when used for simple caching and similar tasks. The way to use a | |
3133 | soft/weak reference is to as it for its referent. The return value then has to | |
3134 | be tested to check that it is not \var{null}. For the basic usage of soft | |
3135 | references, see \myref{lst:softReferenceExample}. For a more thorough | |
3136 | explanation of weak references in general, see\citing{weakRef2006}. | |
3137 | ||
3138 | \begin{listing}[h] | |
3139 | \begin{minted}{java} | |
3140 | // Strong reference | |
3141 | Object strongRef = new Object(); | |
3142 | ||
3143 | // Soft reference | |
3144 | SoftReference<Object> softRef = | |
3145 | new SoftReference<Object>(new Object()); | |
3146 | ||
3147 | // Using the soft reference | |
3148 | Object obj = softRef.get(); | |
3149 | if (obj != null) { | |
3150 | // Use object here | |
3151 | } | |
3152 | \end{minted} | |
3153 | \caption{Showing the basic usage of soft references. Weak references is used the | |
3154 | same way. {\footnotesize (The references are part of the \code{java.lang.ref} | |
3155 | package.)}} | |
3156 | \label{lst:softReferenceExample} | |
3157 | \end{listing} | |
3158 | ||
3159 | The cache based on soft references has no limit for how many ASTs it caches. It | |
3160 | is generally not advisable to keep references to ASTs for prolonged periods of | |
3161 | time, since they are expensive structures to hold on to. For regular plugin | |
fe0a4c48 | 3162 | development, \name{Eclipse} recommends not creating more than one AST at a time to |
41293210 EK |
3163 | limit memory consumption. Since the benchmarking has nothing to do with user |
3164 | experience, and throughput is everything, these advices are intentionally | |
fe0a4c48 | 3165 | ignored. This means that during the benchmarking process, the target \name{Eclipse} |
41293210 EK |
3166 | application may very well work close to its memory limit for the heap space for |
3167 | long periods during the benchmark. | |
8fe94c0b EK |
3168 | |
3169 | \subsection{Memento} | |
d6f8e65a | 3170 | \todoin{Write} |
356782a0 | 3171 | |
3727b75b | 3172 | \chapter{Eclipse Bugs Found} |
94bb49f0 EK |
3173 | \todoin{Add other things and change headline?} |
3174 | ||
3175 | \section{Eclipse bug 420726: Code is broken when moving a method that is | |
0f6e45f8 EK |
3176 | assigning to the parameter that is also the move |
3177 | destination}\label{eclipse_bug_420726} | |
94bb49f0 EK |
3178 | This bug\footnote{\url{https://bugs.eclipse.org/bugs/show\_bug.cgi?id=420726}} |
3179 | was found when analyzing what kinds of names that was to be considered as | |
3727b75b | 3180 | \emph{unfixes} \see{unfixes}. |
94bb49f0 EK |
3181 | |
3182 | \subsection{The bug} | |
3183 | The bug emerges when trying to move a method from one class to another, and when | |
3184 | the target for the move (must be a variable, local or field) is both a parameter | |
fe0a4c48 | 3185 | variable and also is assigned to within the method body. \name{Eclipse} allows this to |
94bb49f0 EK |
3186 | happen, although it is the sure path to a compilation error. This is because we |
3187 | would then have an assignment to a \var{this} expression, which is not allowed | |
3188 | in Java. | |
3189 | ||
3190 | \subsection{The solution} | |
3191 | The solution to this problem is to add all simple names that are assigned to in | |
3192 | a method body to the set of unfixes. | |
128adb4f | 3193 | |
f1b6174d EK |
3194 | \section{Eclipse bug 429416: IAE when moving method from anonymous |
3195 | class}\label{eclipse_bug_429416} | |
128adb4f EK |
3196 | I |
3197 | discovered\footnote{\url{https://bugs.eclipse.org/bugs/show\_bug.cgi?id=429416}} | |
3198 | this bug during a batch change on the \type{org.eclipse.jdt.ui} project. | |
3199 | ||
3200 | \subsection{The bug} | |
fe0a4c48 | 3201 | This bug surfaces when trying to use the \refa{Move Method} refactoring to move a |
94bb49f0 | 3202 | method from an anonymous class to another class. This happens both for my |
fe0a4c48 EK |
3203 | simulation as well as in \name{Eclipse}, through the user interface. It only occurs |
3204 | when \name{Eclipse} analyzes the program and finds it necessary to pass an instance of | |
94bb49f0 | 3205 | the originating class as a parameter to the moved method. I.e. it want to pass a |
128adb4f EK |
3206 | \var{this} expression. The execution ends in an |
3207 | \typewithref{java.lang}{IllegalArgumentException} in | |
3208 | \typewithref{org.eclipse.jdt.core.dom}{SimpleName} and its | |
3209 | \method{setIdentifier(String)} method. The simple name is attempted created in | |
3210 | the method | |
3211 | \methodwithref{org.eclipse.jdt.internal.corext.refactoring.structure.\\MoveInstanceMethodProcessor}{createInlinedMethodInvocation} | |
3212 | so the \type{MoveInstanceMethodProcessor} was early a clear suspect. | |
3213 | ||
3214 | The \method{createInlinedMethodInvocation} is the method that creates a method | |
3215 | invocation where the previous invocation to the method that was moved was. From | |
3216 | its code it can be read that when a \var{this} expression is going to be passed | |
3217 | in to the invocation, it shall be qualified with the name of the original | |
3ab3e132 | 3218 | method's declaring class, if the declaring class is either an anonymous class or |
128adb4f EK |
3219 | a member class. The problem with this, is that an anonymous class does not have |
3220 | a name, hence the term \emph{anonymous} class! Therefore, when its name, an | |
3221 | empty string, is passed into | |
3222 | \methodwithref{org.eclipse.jdt.core.dom.AST}{newSimpleName} it all ends in an | |
3223 | \type{IllegalArgumentException}. | |
3224 | ||
3225 | \subsection{How I solved the problem} | |
3226 | Since the \type{MoveInstanceMethodProcessor} is instantiated in the | |
3227 | \typewithref{no.uio.ifi.refaktor.change.executors}{MoveMethod\-RefactoringExecutor}, | |
3228 | and only need to be a | |
3229 | \typewithref{org.eclipse.ltk.core.refactoring.participants}{MoveProcessor}, I | |
3230 | was able to copy the code for the original move processor and modify it so that | |
3231 | it works better for me. It is now called | |
f1b6174d | 3232 | \typewithref{no.uio.ifi.refaktor.change.processors}{ModifiedMoveInstanceMethodProcessor}. |
128adb4f EK |
3233 | The only modification done (in addition to some imports and suppression of |
3234 | warnings), is in the \method{createInlinedMethodInvocation}. When the declaring | |
3235 | class of the method to move is anonymous, the \var{this} expression in the | |
3236 | parameter list is not qualified with the declaring class' (empty) name. | |
3237 | ||
3727b75b EK |
3238 | \section{Eclipse bug 429954: Extracting statement with reference to local type |
3239 | breaks code}\label{eclipse_bug_429954} | |
a6415293 EK |
3240 | The bug\footnote{\url{https://bugs.eclipse.org/bugs/show\_bug.cgi?id=429954}} |
3241 | was discovered when doing some changes to the way unfixes is computed. | |
3727b75b EK |
3242 | |
3243 | \subsection{The bug} | |
fe0a4c48 | 3244 | The problem is that \name{Eclipse} is allowing selections that references variables of |
3727b75b EK |
3245 | local types to be extracted. When this happens the code is broken, since the |
3246 | extracted method must take a parameter of a local type that is not in the | |
3247 | methods scope. The problem is illustrated in | |
3248 | \myref{lst:extractMethod_LocalClass}, but there in another setting. | |
3249 | ||
3250 | \subsection{Actions taken} | |
3251 | There are no actions directly springing out of this bug, since the Extract | |
a6415293 | 3252 | Method refactoring cannot be meant to be this way. This is handled on the |
fe0a4c48 | 3253 | analysis stage of our \refa{Extract and Move Method} refactoring. So names representing |
3727b75b EK |
3254 | variables of local types is considered unfixes \see{unfixes}. |
3255 | \todoin{write more when fixing this in legal statements checker} | |
3256 | ||
0d7fbd88 EK |
3257 | \chapter{Related Work} |
3258 | ||
3259 | \section{The compositional paradigm of refactoring} | |
3260 | This paradigm builds upon the observation of Vakilian et | |
3261 | al.\citing{vakilian2012}, that of the many automated refactorings existing in | |
3262 | modern IDEs, the simplest ones are dominating the usage statistics. The report | |
fe0a4c48 | 3263 | mainly focuses on \name{Eclipse} as the tool under investigation. |
0d7fbd88 EK |
3264 | |
3265 | The paradigm is described almost as the opposite of automated composition of | |
3266 | refactorings \see{compositeRefactorings}. It works by providing the programmer | |
3267 | with easily accessible primitive refactorings. These refactorings shall be | |
3268 | accessed via keyboard shortcuts or quick-assist menus\footnote{Think | |
fe0a4c48 | 3269 | quick-assist with Ctrl+1 in \name{Eclipse}} and be promptly executed, opposed to in the |
3ab3e132 | 3270 | currently dominating wizard-based refactoring paradigm. They are meant to |
0d7fbd88 EK |
3271 | stimulate composing smaller refactorings into more complex changes, rather than |
3272 | doing a large upfront configuration of a wizard-based refactoring, before | |
3273 | previewing and executing it. The compositional paradigm of refactoring is | |
3274 | supposed to give control back to the programmer, by supporting \himher with an | |
3275 | option of performing small rapid changes instead of large changes with a lesser | |
3276 | degree of control. The report authors hope this will lead to fewer unsuccessful | |
3277 | refactorings. It also could lower the bar for understanding the steps of a | |
3278 | larger composite refactoring and thus also help in figuring out what goes wrong | |
3279 | if one should choose to op in on a wizard-based refactoring. | |
3280 | ||
3281 | Vakilian and his associates have performed a survey of the effectiveness of the | |
3282 | compositional paradigm versus the wizard-based one. They claim to have found | |
3283 | evidence of that the \emph{compositional paradigm} outperforms the | |
3284 | \emph{wizard-based}. It does so by reducing automation, which seem | |
3285 | counterintuitive. Therefore they ask the question ``What is an appropriate level | |
3286 | of automation?'', and thus questions what they feel is a rush toward more | |
3287 | automation in the software engineering community. | |
3288 | ||
3289 | ||
9ff90080 | 3290 | \backmatter{} |
fe0a4c48 | 3291 | \printglossaries |
9ff90080 | 3292 | \printbibliography |
055dca93 | 3293 | \listoftodos |
9ff90080 | 3294 | \end{document} |