evildojo

Programming Tutorials

C

003 - Hello, World! LLVM IR: Default vs -Os

Contents


Introduction


Hello World Default LLVM IR

Assuming your source file is located in hello.c and the output is to be in hello.ll:

$ clang -S -emit-llvm hello.c -o hello.ll

This produces:

; ModuleID = 'hello.c'
source_filename = "hello.c"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.13.0"

@.str = private unnamed_addr constant [12 x i8] c"helloworld\0A\00", align 1

; Function Attrs: noinline nounwind ssp uwtable
define i32 @main() #0 {
  %1 = alloca i32, align 4
  store i32 0, i32* %1, align 4
  %2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0))
  ret i32 0
}

declare i32 @printf(i8*, ...) #1

attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"PIC Level", i32 2}
!1 = !{!"Apple LLVM version 9.0.0 (clang-900.0.39.2)"}

Hello World -Os LLVM IR

Assuming your source file is located in hello.c and the output is to be in hello.ll:

$ clang -Os -S -emit-llvm hello.c -o hello.ll

This produces:

; ModuleID = 'hello.c'
source_filename = "hello.c"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.13.0"

@str = private unnamed_addr constant [11 x i8] c"helloworld\00"

; Function Attrs: nounwind optsize ssp uwtable
define i32 @main() local_unnamed_addr #0 {
  %1 = tail call i32 @puts(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @str, i64 0, i64 0))
  ret i32 0
}

; Function Attrs: nounwind
declare i32 @puts(i8* nocapture readonly) #1

attributes #0 = { nounwind optsize ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"PIC Level", i32 2}
!1 = !{!"Apple LLVM version 9.0.0 (clang-900.0.39.2)"}

Comparison

One major difference right away that I notice is in the string declaration for "helloworld":

@.str = private unnamed_addr constant [12 x i8] c"helloworld\0A\00", align 1    
@str = private unnamed_addr constant [11 x i8] c"helloworld\00"

In the default code generation, our string is declared to be of size 12 consisting of 8-bit integers (ASCII). There is a newline at the end of the string.

In the code optimized for size, our string is declared to be of size 11. This would omit the newline. Why is that what was generated? Let's look:

define i32 @main() #0 {
    %1 = alloca i32, align 4
    store i32 0, i32* %1, align 4
    %2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0))
    ret i32 0
}

define i32 @main() local_unnamed_addr #0 {
  %1 = tail call i32 @puts(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @str, i64 0, i64 0))
  ret i32 0
}

In the main for the default generation, we have the following instructions taking place:

In the size-optimized code, we have:

So, we have some important differences now:

  1. In the default, "helloworld" gets stored v.s. "helloworld"
  2. In the default, we make a call to printf v.s. a call to puts
%2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0))
%1 = tail call i32 @puts(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @str, i64 0, i64 0))

I am now curious as to the differences between how we call printf v.s. puts.

I also have questions about registers.

I am also curious about the difference between "tail call" and regular old "call"

What is "getelementptr"?

What is "inbounds"?

As a new student of LLVM IR, these are questions that I have.


Comparison with LLVM.org code:

; Declare the string constant as a global constant.
@.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00"

; External declaration of the puts function
declare i32 @puts(i8* nocapture) nounwind

; Definition of main function
define i32 @main() {   ; i32()*
  ; Convert [13 x i8]* to i8*...
  %cast210 = getelementptr [13 x i8], [13 x i8]* @.str, i64 0, i64 0

  ; Call puts function to write out the string to stdout.
  call i32 @puts(i8* %cast210)
  ret i32 0
}

; Named metadata
!0 = !{i32 42, null, !"string"}
!foo = !{!0}

Notice how in this one, %cast210 gets set to getelementptr with the same type of values that get passed into puts in the optimized code. The main function is about 1 line longer than the other for this reason but the code looks a lot cleaner and easier to read.