Using your favorite programming language, develop a simulation of a single-digit decimal adder that operates in the same manner as in Babbage’s Analytical Engine. First, prompt the user for two digits in the range 0-9: the addend and the accumulator. Display the addend, the accumulator, and the carry, which is initially zero. Perform a series of cycles as follows:
Test your code with these sums: 0+0, 0+1, 1+0, 1+2, 5+5, 9+1, and 9+9.
The Ex__1_single_digit_adder.py
Python file contains the adder code:
#!/usr/bin/env python
"""Ex__1_single_digit_adder.py: Answer to Ch 1 Ex 1."""
import sys
# Perform one step of the Analytical Engine addition
# operation. a and b are the digits being added, c is the
# carry
def increment_adder(a, b, c):
a = a - 1 # Decrement addend
b = (b + 1) % 10 # Increment accum, wrap to 0 if necessary
if b == 0: # If accumulator is 0, increment carry
c = c + 1
return a, b, c
# Add two decimal digits passed on the command line.
# The sum is returned as digit2 and the carry is 0 or 1.
def add_digits(digit1, digit2):
carry = 0
while digit1 > 0:
[digit1, digit2, carry] = increment_adder(
digit1, digit2, carry)
return digit2, carry
The Ex__1_test_single_digit_adder.py
file contains the test code:
#!/usr/bin/env python
"""Ex__1_test_single_digit_adder.py: Tests for answer to
chapter 1 exercise 1."""
import unittest
import Ex__1_single_digit_adder
class TestSingleDigitAdder(unittest.TestCase):
def test_1(self):
self.assertEqual(Ex__1_single_digit_adder.add_digits(
0, 0), (0, 0))
def test_2(self):
self.assertEqual(Ex__1_single_digit_adder.add_digits(
0, 1), (1, 0))
def test_3(self):
self.assertEqual(Ex__1_single_digit_adder.add_digits(
1, 0), (1, 0))
def test_4(self):
self.assertEqual(Ex__1_single_digit_adder.add_digits(
1, 2), (3, 0))
def test_5(self):
self.assertEqual(Ex__1_single_digit_adder.add_digits(
5, 5), (0, 1))
def test_6(self):
self.assertEqual(Ex__1_single_digit_adder.add_digits(
9, 1), (0, 1))
def test_7(self):
self.assertEqual(Ex__1_single_digit_adder.add_digits(
9, 9), (8, 1))
if __name__ == '__main__':
unittest.main()
To execute the tests, assuming Python is installed and is in your path, execute the following command:
python Ex__1_test_single_digit_adder.py
This is the output of a test run:
C:>python Ex__1_test_single_digit_adder.py
.......
----------------------------------------------------------------------
Ran 7 tests in 0.001s
OK
Create arrays of 40 decimal digits each for the addend, accumulator, and carry. Prompt the user for two decimal integers of up to 40 digits each. Perform the addition digit by digit using the cycles described in Exercise 1, and collect the carry output from each digit position in the carry array. After the cycles are complete, insert carries and, where necessary, ripple them across digits to complete the addition operation. Display the results after each cycle and at the end. Test with the same sums as in Exercise 1 and test 99+1, 999999+1, 49+50, and 50+50.
The Ex__2_40_digit_adder.py
Python file contains the adder code:
#!/usr/bin/env python
"""Ex__2_40_digit_adder.py: Answer to Ch 1 Ex 2."""
import sys
import Ex__1_single_digit_adder
# Add two decimal numbers of up to 40 digits and return the
# sum. Input and output numeric values are represented as
# strings.
def add_40_digits(str1, str2):
max_digits = 40
# Convert str1 into a 40 decimal digit value
num1 = [0]*max_digits
for i, c in enumerate(reversed(str1)):
num1[i] = int(c) - int('0')
# Convert str2 into a 40 decimal digit value
num2 = [0]*max_digits
for i, c in enumerate(reversed(str2)):
num2[i] = int(c) - int('0')
# Sum the digits at each position and record the
# carry for each position
sum = [0]*max_digits
carry = [0]*max_digits
for i in range(max_digits):
(sum[i], carry[i]) = Ex__1_single_digit_adder.
add_digits(num1[i], num2[i])
# Ripple the carry values across the digits
for i in range(max_digits-1):
if (carry[i] == 1):
sum[i+1] = (sum[i+1] + 1) % 10
if (sum[i+1] == 0):
carry[i+1] = 1
# Convert the result into a string with leading zeros
# removed
sum.reverse()
sum_str = "".join(map(str, sum))
sum_str = sum_str.lstrip('0') or '0'
return sum_str
The Ex__2_test_40_digit_adder.py
file contains the test code:
#!/usr/bin/env python
"""Ex__2_test_40_digit_adder.py: Tests for answer to
chapter 1 exercise 2."""
import unittest
import Ex__2_40_digit_adder
class Test40DigitAdder(unittest.TestCase):
def test_1(self):
self.assertEqual(Ex__2_40_digit_adder.add_40_digits(
"0", "0"), "0")
def test_2(self):
self.assertEqual(Ex__2_40_digit_adder.add_40_digits(
"0", "1"), "1")
def test_3(self):
self.assertEqual(Ex__2_40_digit_adder.add_40_digits(
"1", "0"), "1")
def test_4(self):
self.assertEqual(Ex__2_40_digit_adder.add_40_digits(
"1", "2"), "3")
def test_5(self):
self.assertEqual(Ex__2_40_digit_adder.add_40_digits(
"5", "5"), "10")
def test_6(self):
self.assertEqual(Ex__2_40_digit_adder.add_40_digits(
"9", "1"), "10")
def test_7(self):
self.assertEqual(Ex__2_40_digit_adder.add_40_digits(
"9", "9"), "18")
def test_8(self):
self.assertEqual(Ex__2_40_digit_adder.add_40_digits(
"99", "1"), "100")
def test_9(self):
self.assertEqual(Ex__2_40_digit_adder.add_40_digits(
"999999", "1"), "1000000")
def test_10(self):
self.assertEqual(Ex__2_40_digit_adder.add_40_digits(
"49", "50"), "99")
def test_11(self):
self.assertEqual(Ex__2_40_digit_adder.add_40_digits(
"50", "50"), "100")
if __name__ == '__main__':
unittest.main()
To execute the tests, assuming Python is installed and is in your path, execute the following command:
python Ex__2_test_40_digit_adder.py
This is the output of a test run:
C:>python Ex__2_test_40_digit_adder.py
...........
----------------------------------------------------------------------
Ran 11 tests in 0.002s
OK
Modify the programs of Exercise 1 and Exercise 2 to implement the subtraction of 40-digit decimal values. Perform borrowing as required. Test with 0-0, 1-0, 1000000-1, and 0-1. What is the result for 0-1?
The Ex__3_
single_digit_subtractor.py
Python file contains the single-digit subtractor code:
#!/usr/bin/env python
"""Ex__3_single_digit_subtractor.py: Answer to Ch 1 Ex 3
(single digit subtractor)."""
import sys
# Perform one step of the Analytical Engine subtraction
# operation. a and b are the digits being subtracted (a - b),
# c is the carry: 0 = borrow, 1 = not borrow
def decrement_subtractor(a, b, c):
a = (a - 1) % 10 # Decrement left operand, to 9 if wrapped
b = b - 1 # Decrement accumulator
if a == 9: # If accum reached 9, decrement carry
c = c - 1
return a, b, c
# Subtract two decimal digits. The difference is returned as
# digit1 and the carry output is 0 (borrow) or 1 (not borrow).
def subtract_digits(digit1, digit2):
carry = 1
while digit2 > 0:
[digit1, digit2, carry] = decrement_subtractor(
digit1, digit2, carry)
return digit1, carry
The Ex__3_test_
single_digit_subtractor.py
file contains the test code for the single-digit subtractor:
#!/usr/bin/env python
"""Ex__3_test_single_digit_subtractor.py: Tests for answer
to chapter 1 exercise 3 (tests for single digit
subtractor)."""
import unittest
import Ex__3_single_digit_subtractor
class TestSingleDigitSubtractor(unittest.TestCase):
def test_1(self):
self.assertEqual(Ex__3_single_digit_subtractor.
subtract_digits(0, 0), (0, 1))
def test_2(self):
self.assertEqual(Ex__3_single_digit_subtractor.
subtract_digits(0, 1), (9, 0))
def test_3(self):
self.assertEqual(Ex__3_single_digit_subtractor.
subtract_digits(1, 0), (1, 1))
def test_4(self):
self.assertEqual(Ex__3_single_digit_subtractor.
subtract_digits(1, 2), (9, 0))
def test_5(self):
self.assertEqual(Ex__3_single_digit_subtractor.
subtract_digits(5, 5), (0, 1))
def test_6(self):
self.assertEqual(Ex__3_single_digit_subtractor.
subtract_digits(9, 1), (8, 1))
def test_7(self):
self.assertEqual(Ex__3_single_digit_subtractor.
subtract_digits(9, 9), (0, 1))
if __name__ == '__main__':
unittest.main()
The Ex__3_40_digit_subtractor.py
Python file contains the 40-digit subtractor code:
#!/usr/bin/env python
"""Ex__3_40_digit_subtractor.py: Answer to Ch 1 Ex 3."""
import sys
import Ex__3_single_digit_subtractor
# Subtract two decimal numbers of up to 40 digits and
# return the result. Input and output numeric values are
# represented as strings.
def subtract_40_digits(str1, str2):
max_digits = 40
# Convert str1 into a 40 decimal digit value
num1 = [0]*max_digits
for i, c in enumerate(reversed(str1)):
num1[i] = int(c) - int('0')
# Convert str2 into a 40 decimal digit value
num2 = [0]*max_digits
for i, c in enumerate(reversed(str2)):
num2[i] = int(c) - int('0')
# Subtract the digits at each position and record the
# carry for each position
diff = [0]*max_digits
carry = [0]*max_digits
for i in range(max_digits):
(diff[i], carry[i]) = Ex__3_single_digit_subtractor.
subtract_digits(num1[i], num2[i])
# Ripple the carry values across the digits
for i in range(max_digits-1):
if (carry[i] == 0):
diff[i+1] = (diff[i+1] - 1) % 10
if (diff[i+1] == 9):
carry[i+1] = 0
# Convert the result into a string with leading zeros
# removed
diff.reverse()
diff_str = "".join(map(str, diff))
diff_str = diff_str.lstrip('0') or '0'
return diff_str
The Ex__3_test_40_digit_subtractor.py
file contains the test code for the 40-digit subtractor:
#!/usr/bin/env python
"""Ex__3_test_40_digit_subtractor.py: Tests for answer to
chapter 1 exercise 3."""
import unittest
import Ex__3_40_digit_subtractor
class Test40DigitSubtractor(unittest.TestCase):
def test_1(self):
self.assertEqual(Ex__3_40_digit_subtractor.
subtract_40_digits("0", "0"), "0")
def test_2(self):
self.assertEqual(Ex__3_40_digit_subtractor.
subtract_40_digits("1", "0"), "1")
def test_3(self):
self.assertEqual(Ex__3_40_digit_subtractor.
subtract_40_digits("1000000", "1"), "999999")
def test_4(self):
self.assertEqual(Ex__3_40_digit_subtractor.
subtract_40_digits("0", "1"),
"9999999999999999999999999999999999999999")
if __name__ == '__main__':
unittest.main()
To execute the tests, assuming Python is installed and is in your path, execute the following commands:
python Ex__3_test_single_digit_subtractor.py
python Ex__3_test_40_digit_subtractor.py
This is the output of a test run of Ex__3_test_single_digit_subtractor.py
:
C:>python Ex__3_test_single_digit_subtractor.py
.......
----------------------------------------------------------------------
Ran 7 tests in 0.001s
OK
This is the output of a test run of Ex__3_test_40_digit_subtractor.py
:
C:>python Ex__3_test_40_digit_subtractor.py
....
----------------------------------------------------------------------
Ran 4 tests in 0.001s
OK
The result for 0-1 is 9 with a carry of 0.
6502 assembly language references data in memory locations using an operand value containing the address (without the # character that indicates an immediate value).
For example, the LDA $00
instruction loads the byte at memory address $00
into A
. STA $01
stores the byte in A
in address $01
. Addresses can be any value in the range 0
to $FFFF
, assuming memory exists at the address and the address is not already in use for some other purpose. Using your preferred 6502 emulator, write 6502 assembly code to store a 16-bit value in addresses $00-$01
, store a second value in addresses $02-$03
, and then add the two values and store the result in $04-$05
. Be sure to propagate any carry between the 2 bytes. Ignore any carry from the 16-bit result. Test with $0000+$0001
, $00FF+$0001
, and $1234+$5678
.
The 6502 assembly file Ex__4_16_bit_addition.asm
contains the 16-bit addition code:
; Ex__4_16_bit_addition.asm
; Try running this code at
; https://skilldrick.github.io/easy6502/
; Set up the values to be added
; Remove the appropriate semicolons to select the bytes to add:
; ($0000 + $0001) or ($00FF + $0001) or ($1234 + $5678)
LDA #$00
;LDA #$FF
;LDA #$34
STA $00
LDA #$00
;LDA #$00
;LDA #$12
STA $01
LDA #$01
;LDA #$01
;LDA #$78
STA $02
LDA #$00
;LDA #$00
;LDA #$56
STA $03
; Add the two 16-bit values
CLC
LDA $00
ADC $02
STA $04
LDA $01
ADC $03
STA $05
Try running this code at https://skilldrick.github.io/easy6502/.
Write 6502 assembly code to subtract two 16-bit values in a manner similar to Exercise 4. Test with $0001-$0000
, $0001-$0001
, $0100-$00FF
, and $0000-$0001
. What is the result for $0000-$0001
?
The 6502 assembly file Ex__5_16_bit_subtraction.asm
contains the 16-bit subtraction code:
; Ex__5_16_bit_subtraction.asm
; Try running this code at
; https://skilldrick.github.io/easy6502/
; Set up the values to be subtracted
; Remove the appropriate semicolons to select the bytes to
; subtract:
; ($0001 - $0000) or ($0001 - $0001) or ($0001 - $00FF) or
; ($0000 - $0001)
LDA #$01
;LDA #$01
;LDA #$01
;LDA #$00
STA $00
LDA #$00
;LDA #$00
;LDA #$00
;LDA #$00
STA $01
LDA #$00
;LDA #$01
;LDA #$FF
;LDA #$01
STA $02
LDA #$00
;LDA #$00
;LDA #$00
;LDA #$00
STA $03
; Subtract the two 16-bit values
SEC
LDA $00
SBC $02
STA $04
LDA $01
SBC $03
STA $05
Try running this code at https://skilldrick.github.io/easy6502/.
The result for $0000-$0001
is $FFFF
.
Write 6502 assembly code to store two 32-bit integers in addresses $00-03
and $04-$07
, and then add them, storing the results in $08-$0B
. Use a looping construct, including a label and a branch instruction, to iterate over the bytes of the two values to be added. Search the internet for the details of the 6502 decrement and branch instructions and the use of labels in assembly language. Hint: the 6502 zero-page indexed addressing mode works well in this application.
The 6502 assembly file Ex__6_32_bit_addition.asm
contains the 32-bit addition code:
; Ex__6_32_bit_addition.asm
; Try running this code at
; https://skilldrick.github.io/easy6502/
; Set up the values to be added
; Remove the appropriate semicolons to select the bytes to
; add:
; ($00000001 + $00000001) or ($0000FFFF + $00000001) or
; ($FFFFFFFE + $00000001) or ($FFFFFFFF + $00000001)
LDA #$01
;LDA #$FF
;LDA #$FE
;LDA #$FF
STA $00
LDA #$00
;LDA #$FF
;LDA #$FF
;LDA #$FF
STA $01
LDA #$00
;LDA #$00
;LDA #$FF
;LDA #$FF
STA $02
LDA #$00
;LDA #$00
;LDA #$FF
;LDA #$FF
STA $03
LDA #$01
STA $04
LDA #$00
STA $05
STA $06
STA $07
; Add the two 32-bit values using absolute indexed
; addressing mode
LDX #$00
LDY #$04
CLC
ADD_LOOP:
LDA $00, X
ADC $04, X
STA $08, X
INX
DEY
BNE ADD_LOOP
Try running this code at https://skilldrick.github.io/easy6502/.
Rearrange the circuit in Figure 2.5 to convert the AND gate to a NAND gate. Hint: there is no need to add or remove components.
Relocate the R2 resistor and the output signal connection point as follows:
Figure 1: NAND gate circuit
Create a circuit implementation of an OR gate by modifying the circuit in Figure 2.5. Wires, transistors, and resistors can be added as needed.
The OR gate circuit is as follows:
Figure 2: OR gate circuit
Search the internet for free VHDL development software suites that include a simulator. Get one of these suites, set it up, and build any simple demo projects that come with the suite to ensure it is working properly.
Some freely available VHDL development suites are as follows:
Vivado Design Suite will be used for the examples in Chapter 2, Digital Logic, and the following chapters, including installing circuit designs in a low-cost FPGA development board. These steps describe the installation and setup process for Windows 10:
After the installation completes, follow these steps to build an example project:
Using your VHDL toolset, implement the 4-bit adder using the code listings presented in Chapter 2, Digital Logic.
Follow these steps to implement the 4-bit adder:
Arty
in the search field, select Arty A7-35, and then click Next. If Arty does not appear after searching, click Update Board Repositories and then search again.Ex__4_adder4.vhdl
and Ex__4_fulladder.vhdl
, and then click Finish.Add test driver code (search the internet for VHDL testbench to find examples) to your 4-bit adder to drive it through a limited set of input sets and verify that the outputs are correct.
Follow these steps to test the 4-bit adder project created in Exercise 4:
Ex__5_adder4_testbench.vhdl
, and then click Finish.The VHDL file Ex__5_adder4_testbench.vhdl
contains the testbench code:
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
entity ADDER4_TESTBENCH is
end entity ADDER4_TESTBENCH;
architecture BEHAVIORAL of ADDER4_TESTBENCH is
component ADDER4 is
port (
A4 : in std_logic_vector(3 downto 0);
B4 : in std_logic_vector(3 downto 0);
SUM4 : out std_logic_vector(3 downto 0);
C_OUT4 : out std_logic
);
end component;
signal a : std_logic_vector(3 downto 0);
signal b : std_logic_vector(3 downto 0);
signal s : std_logic_vector(3 downto 0);
signal c_out : std_logic;
begin
TESTED_DEVICE : ADDER4
port map (
A4 => a,
B4 => b,
SUM4 => s,
C_OUT4 => c_out
);
TEST : process
begin
a <= "0000";
b <= "0000";
wait for 10 ns;
a <= "0110";
b <= "1100";
wait for 10 ns;
a <= "1111";
b <= "1100";
wait for 10 ns;
a <= "0110";
b <= "0111";
wait for 10 ns;
a <= "0110";
b <= "1110";
wait for 10 ns;
a <= "1111";
b <= "1111";
wait;
end process TEST;
end architecture BEHAVIORAL;
Expand the test driver code and verify that the 4-bit adder produces correct results for all possible combinations of inputs.
Follow these steps to test the 4-bit adder project created in Exercise 4:
ADDER4_TESTBENCH
). Right-click the module name, select Remove File from Project, and then click OK to confirm the removal.Ex__6_adder4_fulltestbench.vhdl
, and then click Finish.xsim.simulate.runtime
to 3000ns
. Click OK.The VHDL file Ex__6_adder4_fulltestbench.vhdl
contains the testbench code:
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.NUMERIC_STD.ALL;
entity ADDER4_TESTBENCH is
end entity ADDER4_TESTBENCH;
architecture BEHAVIORAL of ADDER4_TESTBENCH is
component ADDER4 is
port (
A4 : in std_logic_vector(3 downto 0);
B4 : in std_logic_vector(3 downto 0);
SUM4 : out std_logic_vector(3 downto 0);
C_OUT4 : out std_logic
);
end component;
signal a : std_logic_vector(3 downto 0);
signal b : std_logic_vector(3 downto 0);
signal s : std_logic_vector(3 downto 0);
signal c_out : std_logic;
signal expected_sum5 : unsigned(4 downto 0);
signal expected_sum4 : unsigned(3 downto 0);
signal expected_c : std_logic;
signal error : std_logic;
begin
TESTED_DEVICE : ADDER4
port map (
A4 => a,
B4 => b,
SUM4 => s,
C_OUT4 => c_out
);
TEST : process
begin
-- Test all combinations of two 4-bit addends (256 total tests)
for a_val in 0 to 15 loop
for b_val in 0 to 15 loop
-- Set the inputs to the ADDER4 component
a <= std_logic_vector(to_unsigned(a_val, a'length));
b <= std_logic_vector(to_unsigned(b_val, b'length));
wait for 1 ns;
-- Compute the 5-bit sum of the two 4-bit values
expected_sum5 <= unsigned('0' & a) + unsigned('0' & b);
wait for 1 ns;
-- Break the sum into a 4-bit output and a carry bit
expected_sum4 <= expected_sum5(3 downto 0);
expected_c <= expected_sum5(4);
wait for 1 ns;
-- The 'error' signal will only go to 1 if an error occurs
if ((unsigned(s) = unsigned(expected_sum4)) and
(c_out = expected_c)) then
error <= '0';
else
error <= '1';
end if;
-- Each pass through the inner loop takes 10 ns
wait for 7 ns;
end loop;
end loop;
wait;
end process TEST;
end architecture BEHAVIORAL;
Consider the addition of two signed 8-bit numbers (that is, numbers in the range -128 to +127) where one operand is positive and the other is negative. Is there any pair of 8-bit numbers of different signs that, when added together, will exceed the range -128 to +127? This would constitute a signed overflow. Note: we’re only looking at addition here because, as we’ve seen, subtraction in the 6502 architecture is the same as addition with the right operand’s bits inverted.
The range of the positive (or non-negative) numbers is 0 to 127. The range of negative numbers is -128 to -1. It is only necessary to consider the extremes of each of these ranges to cover all possibilities:
Sum |
Result |
0 + -128 |
-128 |
127 + -128 |
-1 |
0 + -1 |
-1 |
127 + -1 |
126 |
In the preceding table, we can see that there is no pair of 8-bit numbers of different signs that, when added together, exceeds the range -128 to +127.
If the answer to Exercise 1 is no, this implies the only way to create a signed overflow is to add two numbers of the same sign. If an overflow occurs, what can you say about the result of performing XOR between the most significant bit of each operand with the most significant bit of the result? In other words, what will be the result of the expressions left(7) XOR result(7)
and right(7) XOR result(7)
? In these expressions, (7)
indicates bit 7, the most significant bit.
Bit 7 is the sign bit. Since overflow can only occur when both operands are of the same sign, left(7)
must equal right(7)
when an overflow occurs.
When overflow occurs, the sign of the result differs from the sign of the two operands. This means result(7)
differs from bit 7 of both of the operands.
Therefore, left(7) XOR result(7) = 1
and right(7) XOR result(7) = 1
whenever overflow occurs.
Review the VHDL listing in the Arithmetic Logic Unit section in Chapter 3, Processor Elements, and determine whether the logic for setting or clearing the V
flag is correct for addition and subtraction operations. Check the results of adding 126+1, 127+1, -127+(-1), and -128+(-1).
The listing of the VHDL implementation of a portion of a 6502-like Arithmetic Logic Unit (ALU) in Chapter 3, Processor Elements, implements the computation of the overflow flag with the following code:
if (((LEFT(7) XOR result8(7)) = '1') AND
((right_op(7) XOR result8(7)) = '1')) then -- V flag
V_OUT <= '1';
else
V_OUT <= '0';
end if;
The following table shows the results of this code for the four test cases in the question:
left |
right |
left(7) |
right(7) |
result8(7) |
V_OUT |
Correct? |
126 |
1 |
0 |
0 |
0 |
0 |
Yes |
127 |
1 |
0 |
0 |
1 |
1 |
Yes |
-127 |
-1 |
1 |
1 |
1 |
0 |
Yes |
-128 |
-1 |
1 |
1 |
0 |
1 |
Yes |
The logic for setting or clearing the V
flag is correct for these test cases.
When transferring blocks of data over an error-prone transmission medium, it is common to use a checksum to determine whether any data bits were lost or corrupted during transmission. The checksum is typically appended to the transferred data record. One checksum algorithm uses these steps:
After receiving a data block with the appended checksum, the processor can determine whether the checksum is valid by simply adding all of the bytes in the record, including the checksum, together. The checksum is valid if the lowest 8 bits of the sum are zero. Implement this checksum algorithm using 6502 assembly language. The data bytes begin at the memory location store in addresses $10-$11
and the number of bytes (including the checksum byte) is provided as an input in the X register. Set the A register to 1
if the checksum is valid, and to 0
if it is invalid.
The Ex__4_checksum_alg.asm
file contains the following checksum code:
; Ex__4_checksum_alg.asm
; Try running this code at https://skilldrick.github.io/easy6502/
; Set up the array of bytes to be checksummed
LDA #$01
STA $00
LDA #$72
STA $01
LDA #$93
STA $02
LDA #$F4
STA $03
LDA #$06 ; This is the checksum byte
STA $04
; Store the address of the data array in $10-$11
LDA #$00
STA $10
STA $11
; Store the number of bytes in X
LDX #5
; Entering the checksum algorithm
; Move X to Y
TXA
TAY
; Compute the checksum
LDA #$00
DEY
LOOP:
CLC
ADC ($10), Y
DEY
BPL LOOP
CMP #$00
BNE ERROR
; The sum is zero: Checksum is correct
LDA #1
JMP DONE
; The sum is nonzero: Checksum is incorrect
ERROR:
LDA #0
; A contains 1 if checksum is correct, 0 if it is incorrect
DONE:
Make the checksum validation code from Exercise 4 into a labeled subroutine that can be called with a JSR
instruction and that ends with an RTS
instruction.
The Ex__5_checksum_subroutine.asm
file implements the checksum algorithm as a subroutine:
; Ex__5_checksum_subroutine.asm
; Try running this code at https://skilldrick.github.io/easy6502/
; Set up the array of bytes to be checksummed
LDA #$01
STA $00
LDA #$72
STA $01
LDA #$93
STA $02
LDA #$F4
STA $03
LDA #$06 ; This is the checksum byte
STA $04
; Store the address of the data array in $10-$11
LDA #$00
STA $10
STA $11
; Store the number of bytes in X
LDX #5
; Call the checksum calculation subroutine
JSR CALC_CKSUM
; Halt execution
BRK
; ==============================================
; Compute the checksum
CALC_CKSUM:
; Move X to Y
TXA
TAY
LDA #$00
DEY
LOOP:
CLC
ADC ($10), Y
DEY
BPL LOOP
CMP #$00
BNE CKSUM_ERROR
; The sum is zero: Checksum is correct
LDA #1
JMP DONE
; The sum is nonzero: Checksum is incorrect
CKSUM_ERROR:
LDA #0
; A contains 1 if checksum is correct, 0 if it is incorrect
DONE:
RTS
Write and execute a set of tests to verify the correct operation of the checksum testing subroutine you implemented in Exercise 4 and Exercise 5. What is the shortest block of data your code can perform checksum validation upon? What is the longest block?
The Ex__6_checksum_tests.asm
file implements the following checksum test code:
; Ex__6_checksum_tests.asm
; Try running this code at https://skilldrick.github.io/easy6502/
; After tests complete, A=$AA if success, A=$EE if error detected
; Store the address of the data array in $10-$11
LDA #$00
STA $10
STA $11
; ==============================================
; Test 1: 1 byte; Checksum: 00 Checksum should pass? Yes
LDA #$00
STA $00
; Store the number of bytes in X
LDX #1
; Call the checksum calculation subroutine
JSR CALC_CKSUM
CMP #$01
BEQ TEST2
JMP ERROR
TEST2:
; ==============================================
; Test 2: 1 byte; Checksum: 01 Checksum should pass? No
LDA #$01
STA $00
; Store the number of bytes in X
LDX #1
; Call the checksum calculation subroutine
JSR CALC_CKSUM
CMP #$00
BEQ TEST3
JMP ERROR
TEST3:
; ==============================================
; Test 3: 2 bytes: 00 Checksum: 00 Checksum should pass? Yes
LDA #$00
STA $00
STA $01
; Store the number of bytes in X
LDX #2
; Call the checksum calculation subroutine
JSR CALC_CKSUM
CMP #$01
BEQ TEST4
JMP ERROR
TEST4:
; ==============================================
; Test 4: 2 bytes: 00 Checksum: 01 Checksum should pass? No
LDA #$00
STA $00
LDA #$01
STA $01
; Store the number of bytes in X
LDX #2
; Call the checksum calculation subroutine
JSR CALC_CKSUM
CMP #$00
BEQ TEST5
JMP ERROR
TEST5:
; ==============================================
; Test 5: 2 bytes: 01 Checksum: 00 Checksum should pass? No
LDA #$01
STA $00
LDA #$00
STA $01
; Store the number of bytes in X
LDX #1
; Call the checksum calculation subroutine
JSR CALC_CKSUM
CMP #$00
BEQ TEST6
JMP ERROR
TEST6:
; ==============================================
; Test 6: 3 bytes: 00 00 Checksum: 00 Checksum should pass? Yes
LDA #$00
STA $00
STA $01
STA $02
; Store the number of bytes in X
LDX #3
; Call the checksum calculation subroutine
JSR CALC_CKSUM
CMP #$01
BEQ TEST7
JMP ERROR
TEST7:
; ==============================================
; Test 7: 3 bytes: 00 00 Checksum: 00 Checksum should pass? Yes
LDA #$00
STA $00
STA $01
STA $02
; Store the number of bytes in X
LDX #3
; Call the checksum calculation subroutine
JSR CALC_CKSUM
CMP #$01
BEQ TEST8
JMP ERROR
TEST8:
; ==============================================
; Test 8: 3 bytes: 00 00 Checksum: 01 Checksum should pass? No
LDA #$00
STA $00
LDA #$00
STA $01
LDA #$01
STA $02
; Store the number of bytes in X
LDX #3
; Call the checksum calculation subroutine
JSR CALC_CKSUM
CMP #$00
BEQ TEST9
JMP ERROR
TEST9:
; ==============================================
; Test 9: 3 bytes: 00 01 Checksum: FF Checksum should pass? Yes
LDA #$00
STA $00
LDA #$01
STA $01
LDA #$FF
STA $02
; Store the number of bytes in X
LDX #3
; Call the checksum calculation subroutine
JSR CALC_CKSUM
CMP #$01
BEQ TEST10
JMP ERROR
TEST10:
; ==============================================
; Test 10: 5 bytes: 01 72 93 F4 Checksum: 06 Checksum should pass? Yes
LDA #$01
STA $00
LDA #$72
STA $01
LDA #$93
STA $02
LDA #$F4
STA $03
LDA #$06 ; This is the checksum byte
STA $04
; Store the number of bytes in X
LDX #5
; Call the checksum calculation subroutine
JSR CALC_CKSUM
CMP #$01
BEQ PASSED
ERROR:
; ==============================================
; Error occurred; Halt execution with $EE in A
LDA #$EE
BRK
PASSED:
; ==============================================
; All tests passed; Halt execution with $AA in A
LDA #$AA
BRK
; ==============================================
; Compute the checksum
CALC_CKSUM:
; Move X to Y
TXA
TAY
LDA #$00
DEY
LOOP:
CLC
ADC ($10), Y
DEY
BPL LOOP
CMP #$00
BNE CKSUM_ERROR
; The sum is zero: Checksum is correct
LDA #1
JMP DONE
; The sum is nonzero: Checksum is incorrect
CKSUM_ERROR:
LDA #0
; A contains 1 if checksum is correct, 0 if it is incorrect
DONE:
RTS
The checksum routine works for byte sequences with lengths from 1 to 255 bytes.
Create a circuit implementation of a NAND gate using two CMOS transistor pairs. Unlike NPN transistor gate circuits, no resistors are required for this circuit.
The diagram for this circuit is as follows:
Figure 3: NAND gate circuit
A 16-gigabit DRAM integrated circuit has two bank group selection inputs, two bank selection inputs, and 17 row address inputs. How many bits are in each row of a bank in this device?
The DRAM circuit contains 16 gigabits = 16 × 230 bits.
The number of address bits is 2 bank group bits + 2 bank bits + 17 row address bits = 21 bits.
The row dimension of each bank is therefore (16 × 230) ÷ 221 = 8,192 bits.
Restart your computer and enter the BIOS or UEFI settings. Examine each of the menus available in this environment. Does your computer have a BIOS or does it use UEFI? Does your motherboard support overclocking? When you are finished, be sure to select the option to quit without saving changes unless you are absolutely certain you want to make changes.
In Windows, you can enter the BIOS/UEFI settings by changing the startup options while Windows is running. To access these settings, perform the following steps:
startup
and select Change advanced startup options.The following is in response to the questions in this exercise for a specific computer system (an Asus ZenBook UX303LA laptop, in this example):
After you’ve finished examining the UEFI information, exit without saving any changes by following these steps:
Run the appropriate command on your computer to display the currently running processes. What is the process ID (PID) of the process you are using to run this command?
In Windows, open a Command Prompt window (type command
in the Windows search box to locate the application) and then type the tasklist
command as follows:
C:>tasklist
Image Name PID Session Name Session# Mem Usage
=================== ===== ============ ======== ============
System Idle Process 0 Services 0 8 K
System 4 Services 0 9,840 K
Registry 120 Services 0 85,324 K
smss.exe 544 Services 0 640 K
csrss.exe 768 Services 0 4,348 K
wininit.exe 852 Services 0 4,912 K
services.exe 932 Services 0 8,768 K
lsass.exe 324 Services 0 18,160 K
svchost.exe 1044 Services 0 2,308 K
svchost.exe 1068 Services 0 27,364 K
.
.
.
svchost.exe 12184 Services 0 8,544 K
cmd.exe 16008 Console 3 3,996 K
conhost.exe 21712 Console 3 18,448 K
tasklist.exe 15488 Console 3 10,096 K
The current process is the one running the tasklist.exe
application. The PID of this process is 15488
.
Rate monotonic scheduling (RMS) is an algorithm for assigning thread priorities in preemptive, hard, real-time applications in which threads execute periodically. RMS assigns the highest priority to the thread with the shortest execution period, the next-highest priority to the thread with the next-shortest execution period, and so on. An RMS system is schedulable, meaning all tasks are guaranteed to meet their deadlines (assuming no inter-thread interactions or other activities such as interrupts, resulting in processing delays) if the following condition is met:
This formula represents the maximum fraction of available processing time that can be consumed by n threads. In this formula, Ci is the maximum execution time required for thread i, and Ti is the execution period of thread i.
Is the following system composed of three threads schedulable?
Thread |
Execution Time(Ci), ms |
Execution period(Ti), ms |
Thread 1 |
50 |
100 |
Thread 2 |
100 |
500 |
Thread 3 |
120 |
1,000 |
First, evaluate the left side of the RMS formula using the data from the table:
Then, evaluate the right side of the RMS formula:
Because 0.82 is not less than or equal to 0.7798, this set of tasks is not schedulable in RMS.
A commonly used form of the one-dimensional discrete cosine transform (DCT) is as follows:
In this formula, k, the index of the DCT coefficient, runs from 0 to N-1.
Write a program to compute the DCT of the sequence:
The cosine terms in the formula depend only on the indexes n and k and do not depend on the input data sequence x. This means the cosine terms can be computed one time and stored as constants for later use. Using this as a preparatory step, the computation of each DCT coefficient reduces to a sequence of MAC operations.
This formula represents the unoptimized form of the DCT computation, requiring N2 iterations of the MAC operation to compute all N DCT coefficients.
The Ex__2_dct_formula.py
Python file contains the DCT code:
#!/usr/bin/env python
"""Ex__2_dct_formula.py: Answer to chapter 6 exercise 2."""
# Output produced by this program:
# Index 0 1 2 3 4 5 6 7
# x 0.5000 0.2000 0.7000 -0.6000 0.4000 -0.2000 1.0000 -0.3000
# DCT(x) 1.7000 0.4244 0.6374 0.4941 -1.2021 0.5732 -0.4936 2.3296
import math
# Input vector
x = [0.5, 0.2, 0.7, -0.6, 0.4, -0.2, 1.0, -0.3]
# Compute the DCT coefficients
dct_coef = [[i for i in range(len(x))] for j in range(len(x))]
for n in range(len(x)):
for k in range(len(x)):
dct_coef[n][k] = math.cos((math.pi/len(x))*(n + 1/2)*k)
# Compute the DCT
x_dct = [i for i in range(len(x))]
for k in range(len(x)):
x_dct[k] = 0;
for n in range(len(x)):
x_dct[k] += x[n]*dct_coef[n][k]
# Print the results
print('Index', end='')
for i in range(len(x)):
print("%8d" % i, end='')
print('
x ', end='')
for i in range(len(x)):
print("%8.4f" % x[i], end='')
print('
DCT(x) ', end='')
for i in range(len(x)):
print("%8.4f" % x_dct[i], end='')
To run the code, assuming Python is installed and is in your path, execute the following command:
python Ex__2_dct_formula.py
This is the output produced by the program:
C:>Ex__2_dct_formula.py
Index 0 1 2 3 4 5 6 7
x 0.5000 0.2000 0.7000 -0.6000 0.4000 -0.2000 1.0000 -0.3000
DCT(x) 1.7000 0.4244 0.6374 0.4941 -1.2021 0.5732 -0.4936 2.3296
The hyperbolic tangent is often used as an activation function in Artificial Neural Networks (ANNs). The hyperbolic tangent function is:
Given a neuron with inputs from three preceding neurons as depicted in Figure 6.4, compute the neuron’s output with the hyperbolic tangent as the activation function F(x) using the following neuron outputs and path weights:
Neuron |
Neuron output |
Weight |
N1 |
0.6 |
0.4 |
N2 |
-0.3 |
0.8 |
N3 |
0.5 |
-0.2 |
The Ex__3_activation_func.py
Python file contains the following code:
#!/usr/bin/env python
"""Ex__3_activation_func.py: Answer to Ch 6 Ex 3."""
# Output produced by this program:
# Neuron output = -0.099668
import math
# Neuron signal and weight vectors
neuron = [0.6, -0.3, 0.5]
weight = [0.4, 0.8, -0.2]
sum = 0
for i in range(len(neuron)):
sum = sum + neuron[i] * weight[i]
output = math.tanh(sum)
# Print the results
print('Neuron output = %8.6f' % output)
To run the code, assuming Python is installed and is in your path, execute the following command:
python Ex__3_activation_func.py
This is the output produced by the program:
C:>Ex__3_activation_func.py
Neuron output = -0.099668
A 16-bit embedded processor has separate memory regions for code and data. Code is stored in flash memory and modifiable data is stored in RAM. Some data values, such as constants and initial values for RAM data items, are stored in the same flash memory region as the program instructions. RAM and ROM reside in the same address space. Which of the processor architectures discussed in Chapter 7, Processor and Memory Architectures, best describes this processor?
Because the code and data are located in the same address space, this is a von Neumann architecture.
The fact that the code and some data items are stored in ROM and other data items reside in RAM is not relevant to determining the architecture category.
The processor described in Exercise 1 has memory security features that prevent executed code from modifying program instruction memory. The processor uses physical addresses to access instructions and data. Does this processor contain an MMU?
While the protection of memory regions is a feature of MMUs, the presence of memory protection alone does not mean an MMU is in use. This processor does not contain an MMU.
MMUs generally perform virtual-to-physical address translation, which does not occur in the processor described here.
The order of accessing sequential elements in a large data structure can have a measurable impact on processing speed due to factors such as the reuse of TLB entries. Accessing distant array elements in sequence (that is, elements that are not in the same page frame as previously accessed elements) requires frequent soft faults as new TLB entries are loaded and old TLB entries are discarded.
Write a program that creates a two-dimensional array of numbers with a large size, such as 10,000 rows by 10,000 columns. Iterate through the array in column-major order, assigning each element the sum of the row and column indices. Column-major means the column index increments fastest. In other words, the column index increments in the inner loop. Measure precisely how long this procedure takes. Note, you may need to take steps to ensure your programming language does not optimize away the entire calculation if the results from the array are not used later. It may suffice to print one of the array values after the timing is complete, or you may need to do something like sum all the array elements and print that result.
Repeat the process, including the timing, exactly as explained before, except change the inner loop to iterate over the row index and the outer loop to iterate over the column index, making the access sequence row-major.
Since general-purpose computers perform many other tasks while running your code, you may need to perform both procedures a number of times to get a statistically valid result. You might start by running the experiment 10 times and averaging the times for column-major and row-major array access.
Are you able to determine a consistently superior array access method? Which order is fastest on your system using the language you selected? Note that the difference between the column-major and row-major access order may not be dramatic — it might be just a few percent.
The Ex__3_row_column_major_order.py
file contains the following Python implementation of a solution to this exercise:
#!/usr/bin/env python
"""Ex__3_row_column_major_order.py: Answer to chapter 7 exercise 3."""
# Typical output from a run of this script:
# Average row-major time : 16.68 sec
# Average column-major time: 15.94 sec
# Average time difference : 0.74 sec
# Winner is column-major indexing; It is faster by 4.42%
import time
dim = 10000
matrix = [[0] * dim] * dim
num_passes = 10
row_major_time = 0
col_major_time = 0
for k in range(num_passes):
print('Pass %d of %d:' % (k+1, num_passes))
t0 = time.time()
for i in range(dim):
for j in range(dim):
matrix[i][j] = i + j
t1 = time.time()
total_time = t1 - t0
col_major_time = col_major_time + total_time
print(' Column-major time to fill array: %.2f sec' %
total_time)
t0 = time.time()
for i in range(dim):
for j in range(dim):
matrix[j][i] = i + j
t1 = time.time()
total_time = t1 - t0
row_major_time = row_major_time + total_time
print(' Row-major time to fill array: %.2f sec' %
total_time)
print('')
row_major_average = row_major_time / num_passes
col_major_average = col_major_time / num_passes
if (row_major_average < col_major_average):
winner = 'row'
pct_better = 100 * (col_major_average -
row_major_average) / col_major_average
else:
winner = 'column'
pct_better = 100 * (row_major_average -
col_major_average) / row_major_average
print('Average row-major time : %.2f sec' % row_major_average)
print('Average column-major time: %.2f sec' % col_major_average)
print('Average time difference : %.2f sec' % (
(row_major_time-col_major_time) / num_passes))
print(('Winner is ' + winner +
'-major indexing; It is faster by %.2f%%') % pct_better)
This program takes a few minutes to run on a Windows PC.
This is the typical output from running this program:
Average row-major time : 16.68 sec
Average column-major time: 15.94 sec
Average time difference : 0.74 sec
Winner is column-major indexing; It is faster by 4.42%
Consider a direct-mapped L1-I cache of 32 KB. Each cache line consists of 64 bytes and the system address space is 4 GB. How many bits are in the cache tag? Which bit numbers (bit 0 is the least significant bit) are they within the address word?
The cache contains 32,768 bytes with 64 bytes in each line. There are 32,768 ÷ 64 = 512 sets in the cache. 512 = 29. The set number is thus 9 bits in length.
Each cache line contains 64 (26) bytes, which means the lower 6 bits of each address represent the byte offset within the cache line.
A 4 GB address space requires 32-bit addresses. Subtracting the 9 bits in the set number and the 6 bits in the byte offset from the 32-bit address results in 32 – (9 + 6) = 17 bits in the cache tag.
The cache tag lies in the 17 most significant bits of the address, so the range of these bits within a 32-bit address runs from bit 15 to bit 31.
Consider an 8-way set-associative L2 instruction and data cache of 256 KB, with 64 bytes in each cache line. How many sets are in this cache?
The number of lines in the cache is 262,144 ÷ 64 = 4,096.
Each set contains 8 lines.
The number of sets = 4,096 lines ÷ 8 lines per set = 512 sets.
A processor has a 4-stage pipeline with maximum delays of 0.8, 0.4, 0.6, and 0.3 nanoseconds in stages 1-4, respectively. If the first stage is replaced with two stages that have maximum delays of 0.5 and 0.3 nanoseconds, respectively, how much will the processor clock speed increase in percentage terms?
The maximum clock speed is determined by the slowest pipeline stage. The slowest stage of the 4-stage pipeline takes 0.8 ns. The maximum clock frequency is:
1 ÷ (0.8 × 10-9) = 1.25 GHz
The 5-stage pipeline has a slowest stage of 0.6 ns. The maximum clock frequency is:
1 ÷ (0.6 × 10-9) = 1.667 GHz
The clock frequency increase resulting from the addition of the pipeline stage is:
100 × (1.667 × 109 - 1.25 × 109) ÷ (1.25 × 109) = 33.3%
Using a programming language that allows access to the byte representation of floating-point data types (such as C or C++), write a function that accepts a 32-bit single-precision variable as input. Extract the sign, exponent, and mantissa from the bytes of the floating-point variable and display them. Remove the bias term from the exponent before displaying its value and display the mantissa as a decimal number. Test the program with the values 0, -0, 1, -1, 6.674e-11, 1.0e38, 1.0e39, 1.0e-38, and 1.0e-39. The numeric values listed here containing e are using the C/C++ text representation of floating-point numbers. For example, 6.674e-11 means 6.674 x 10-11.
The Ex__1_float_format.cpp
C++ file contains the code for this exercise:
// Ex__1_float_format.cpp
#include <iostream>
#include <cstdint>
void print_float(float f)
{
const auto bytes = static_cast<uint8_t*>(static_cast<void*>(&f));
printf(" Float | %9g | ", f);
for (int i = sizeof(float) - 1; i >= 0; i--)
printf("%02X", bytes[i]);
printf(" | ");
const auto sign = bytes[3] >> 7;
const auto exponent = ((static_cast<uint16_t>(bytes[3] & 0x7F)
<< 8) | bytes[2]) >> 7;
auto exp_unbiased = exponent - 127;
uint32_t mantissa = 0;
for (auto i = 0; i < 3; i++)
mantissa = (mantissa << 8) | bytes[2 - i];
mantissa &= 0x7FFFFF; // Clear upper bit
double mantissa_dec;
if (exponent == 0) // This is zero or a subnormal number
{
mantissa_dec = mantissa / static_cast<double>(0x800000);
exp_unbiased++;
}
else
mantissa_dec = 1.0 + mantissa /static_cast<double>(0x800000);
printf(" %d | %4d | %lf
", sign, exp_unbiased, mantissa_dec);
}
int main(void)
{
printf(" Type | Number | Bytes | Sign | Exponent | Mantissa
");
printf(" -------|-----------|------------------|------|----------
|---------
");
print_float(0);
print_float(-0); // Minus sign is ignored
print_float(1);
print_float(-1);
print_float(6.674e-11f);
print_float(1.0e38f);
//print_float(1.0e39f); // Compile-time error
print_float(1.0e-38f);
print_float(1.0e-39f);
return 0;
}
This is the output of the program:
Type | Number | Bytes | Sign | Exponent | Mantissa
------|----------|----------|------|----------|---------
Float | 0 | 00000000 | 0 | -126 | 0.000000
Float | 0 | 00000000 | 0 | -126 | 0.000000
Float | 1 | 3F800000 | 0 | 0 | 1.000000
Float | -1 | BF800000 | 1 | 0 | 1.000000
Float | 6.674e-11| 2E92C348 | 0 | -34 | 1.146585
Float | 1e+38 | 7E967699 | 0 | 126 | 1.175494
Float | 1e-38 | 006CE3EE | 0 | -126 | 0.850706
Float | 1e-39 | 000AE398 | 0 | -126 | 0.085071
These are some notes about the results:
print_float
function in the second row of the table is preceded by a minus sign, but the sign is ignored during the conversion to a floating point.1.0e39f
is not shown because using it causes a compile-time error: the floating constant is out of range.Modify the program from Exercise 1 to also accept a double-precision, floating-point variable and print the sign, exponent (with the bias removed), and mantissa from the variable. Test with the same input values as in Exercise 1, and also with the values 1.0e308, 1.0e309, 1.0e-308, and 1.0e-309.
The Ex__2_double_format.cpp
C++ file contains the code for this exercise:
// Ex__2_double_format.cpp
#include <iostream>
#include <cstdint>
void print_float(float f)
{
const auto bytes = static_cast<uint8_t*>(static_cast<void*>(&f));
printf(" Float | %9g | ", f);
for (int i = sizeof(float) - 1; i >= 0; i--)
printf("%02X", bytes[i]);
printf(" | ");
const auto sign = bytes[3] >> 7;
const auto exponent = ((static_cast<uint16_t>(bytes[3] & 0x7F) << 8) | bytes[2]) >> 7;
auto exp_unbiased = exponent - 127;
uint32_t mantissa = 0;
for (auto i = 0; i < 3; i++)
mantissa = (mantissa << 8) | bytes[2 - i];
mantissa &= 0x7FFFFF; // Clear upper bit
double mantissa_dec;
if (exponent == 0) // This is zero or a subnormal number
{
mantissa_dec = mantissa / static_cast<double>(0x800000);
exp_unbiased++;
}
else
mantissa_dec = 1.0 + mantissa /static_cast<double>(0x800000);
printf(" %d | %4d | %lf
", sign, exp_unbiased, mantissa_dec);
}
void print_double(double d)
{
const auto bytes = static_cast<uint8_t*>(static_cast<void*>(&d));
printf(" Double | %9g | ", d);
for (int i = sizeof(double) - 1; i >= 0; i--)
printf("%02X", bytes[i]);
printf(" | ");
const auto sign = bytes[7] >> 7;
const auto exponent = ((static_cast<uint16_t>(bytes[7] & 0x7F) << 8) | bytes[6]) >> 4;
auto exp_unbiased = exponent - 1023;
uint64_t mantissa = 0;
for (auto i = 0; i < 7; i++)
mantissa = (mantissa << 8) | bytes[6 - i];
mantissa &= 0xFFFFFFFFFFFFF; // Save the low 52 bits
double mantissa_dec;
if (exponent == 0) // This is zero or a subnormal number
{
mantissa_dec = mantissa /static_cast<double>(0x10000000000000);
exp_unbiased++;
}
else
mantissa_dec = 1.0 + mantissa /static_cast<double>(0x10000000000000);
printf(" %d | %5d | %lf
", sign, exp_unbiased, mantissa_dec);
}
int main(void)
{
printf(" Type | Number | Bytes | Sign | Exponent | Mantissa
");
printf(" -------|-----------|------------------|------|----------|---------
");
print_float(0);
print_float(-0); // The minus sign is ignored
print_float(1);
print_float(-1);
print_float(6.674e-11f);
print_float(1.0e38f);
//print_float(1.0e39f); // Compile-time error
print_float(1.0e-38f);
print_float(1.0e-39f);
print_double(0);
print_double(-0); // The minus sign is ignored
print_double(1);
print_double(-1);
print_double(6.674e-11);
print_double(1.0e38);
print_double(1.0e39);
print_double(1.0e-38);
print_double(1.0e-39);
print_double(1.0e308);
//print_double(1.0e309); // Compile-time error
print_double(1.0e-308);
print_double(1.0e-309);
return 0;
}
This is the output of the program:
Type | Number | Bytes | Sign | Exponent | Mantissa
-------|-----------|------------------|------|----------|---------
Float | 0 | 00000000 | 0 | -126 | 0.000000
Float | 0 | 00000000 | 0 | -126 | 0.000000
Float | 1 | 3F800000 | 0 | 0 | 1.000000
Float | -1 | BF800000 | 1 | 0 | 1.000000
Float | 6.674e-11 | 2E92C348 | 0 | -34 | 1.146585
Float | 1e+38 | 7E967699 | 0 | 126 | 1.175494
Float | 1e-38 | 006CE3EE | 0 | -126 | 0.850706
Float | 1e-39 | 000AE398 | 0 | -126 | 0.085071
Double | 0 | 0000000000000000 | 0 | -1022 | 0.000000
Double | 0 | 0000000000000000 | 0 | -1022 | 0.000000
Double | 1 | 3FF0000000000000 | 0 | 0 | 1.000000
Double | -1 | BFF0000000000000 | 1 | 0 | 1.000000
Double | 6.674e-11 | 3DD25868F4DEAE16 | 0 | -34 | 1.146584
Double | 1e+38 | 47D2CED32A16A1B1 | 0 | 126 | 1.175494
Double | 1e+39 | 48078287F49C4A1D | 0 | 129 | 1.469368
Double | 1e-38 | 380B38FB9DAA78E4 | 0 | -127 | 1.701412
Double | 1e-39 | 37D5C72FB1552D83 | 0 | -130 | 1.361129
Double | 1e+308 | 7FE1CCF385EBC8A0 | 0 | 1023 | 1.112537
Double | 1e-308 | 000730D67819E8D2 | 0 | -1022 | 0.449423
Double | 1e-309 | 0000B8157268FDAF | 0 | -1022 | 0.044942
These are some notes about the results:
print_double
function in the second row of the table containing the Double
type is preceded by a minus sign, but the sign is ignored during the conversion to a floating-point value.1.0e309
is not shown because using it causes a compile-time error: the floating constant is out of range.Search the internet for information about the NXP Semiconductors i.MX RT1060 processor family. Download the product family datasheet and answer the following questions about these processors.
Introductory information about the i.MX RT1060 processor family is available at https://www.nxp.com/docs/en/nxp/data-sheets/IMXRT1060CEC.pdf.
The complete i.MX RT1060 reference manual is available only after you create an account at https://www.nxp.com.
While logged in to your account, search for i.MX RT1060 Processor Reference Manual
to locate the reference manual and download it. The filename is IMXRT1060RM.pdf
.
Do the i.MX RT1060 processors support the concept of supervisor-mode instruction execution? Explain your answer.
Performing a search for supervisor
in the i.MX RT1060 processor reference manual produces a few hits. However, all of these uses refer to access restrictions related to a particular subsystem, such as the FlexCAN module.
Supervisor mode in the i.MX RT1060 processor does not operate at the instruction execution level, so these processors do not implement supervisor mode instruction execution as described in Chapter 9, Specialized Processor Extensions.
Do the i.MX RT1060 processors support the concept of paged virtual memory? Explain your answer.
The i.MX RT1060 processors use physical memory addressing with up to 16 memory protection regions. These processors do not support the concept of paged virtual memory.
Do the i.MX RT1060 processors support floating-point operations in hardware? Explain your answer.
Section 1.3, Features, in the reference manual lists the following capability: Single-precision and double-precision FPU (Floating Point Unit).
The ARM Cortex-M7 Processor Technical Reference Manual, available at http://infocenter.arm.com/help/topic/com.arm.doc.ddi0489b/DDI0489B_cortex_m7_trm.pdf, states that the FPU provides “floating-point computation functionality that is compliant with the ANSI/IEEE Std 754-2008, IEEE Standard for Binary Floating-Point Arithmetic, referred to as the IEEE 754 standard.”
The i.MX RT1060 processors support floating-point operations in hardware.
What power management features do the i.MX RT1060 processors support?
Section 12.4 of the reference manual describes the processor power management subsystem. Some of the key features are as follows:
What security features do the i.MX RT1060 processors support?
Chapter 6, Specialized Computing Domains, in the reference manual describes the system security components. Some of the key features are as follows:
Install the free Visual Studio Community edition, available at https://visualstudio.microsoft.com/vs/community/, on a Windows PC. After installation is complete, open the Visual Studio IDE and select Get Tools and Features… under the Tools menu. Install the Desktop development with C++ workload:
Developer Command Prompt for VS 2022
. When the app appears in the search menu, select it to open Command Prompt.hello_x86.asm
with the content shown in the source listing in the x86 assembly language section of Chapter 10, Modern Processor Architectures and Instruction Sets.Install Visual Studio Community as described in the question and then install the Desktop development with C++ workload within Visual Studio Community:
Ex__1_hello_x86.asm
file contains the following example solution to this exercise:
.386
.model FLAT,C
.stack 400h
.code
includelib libcmt.lib
includelib legacy_stdio_definitions.lib
extern printf:near
extern exit:near
public main
main proc
; Print the message
push offset message
call printf
; Exit the program with status 0
push 0
call exit
main endp
.data
message db "Hello, Computer Architect!",0
end
ml /Fl /Zi /Zd Ex__1_hello_x86.asm
C:>Ex__1_hello_x86.exe
Hello, Computer Architect!
This is the listing file created by the build procedure:
Microsoft (R) Macro Assembler Version 14.31.31104.0 02/21/22 07:39:20
Ex__1_hello_x86.asm Page 1 - 1
.386
.model FLAT,C
.stack 400h
00000000 .code
includelib libcmt.lib
includelib legacy_stdio_definitions.lib
extern printf:near
extern exit:near
public main
00000000 main proc
; Print the message
00000000 68 00000000 R push offset message
00000005 E8 00000000 E call printf
; Exit the program with status 0
0000000A 6A 00 push 0
0000000C E8 00000000 E call exit
00000011 main endp
00000000 .data
00000000 48 65 6C 6C 6F message db "Hello, Computer Architect!",0
2C 20 43 6F 6D
70 75 74 65 72
20 41 72 63 68
69 74 65 63 74
21 00
end
Microsoft (R) Macro Assembler Version 14.31.31104.0 02/21/22 07:39:20
Ex__1_hello_x86.asm Symbols 2 - 1
Segments and Groups:
N a m e Size Length Align Combine Class
FLAT . . . . . . . . . . . . . . GROUP
STACK . . . . . . . . . . . . . 32 Bit 00000400 DWord Stack 'STACK'
_DATA . . . . . . . . . . . . . 32 Bit 0000001B DWord Public 'DATA'
_TEXT . . . . . . . . . . . . . 32 Bit 00000011 DWord Public 'CODE'
Procedures, parameters, and locals:
N a m e Type Value Attr
main . . . . . . . . . . . . . . P Near 00000000 _TEXT Length= 00000011
Symbols:
N a m e Type Value Attr
@CodeSize . . . . . . . . . . . Number 00000000h
@DataSize . . . . . . . . . . . Number 00000000h
@Interface . . . . . . . . . . . Number 00000001h
@Model . . . . . . . . . . . . . Number 00000007h
@code . . . . . . . . . . . . . Text _TEXT
@data . . . . . . . . . . . . . Text FLAT
@fardata? . . . . . . . . . . . Text FLAT
@fardata . . . . . . . . . . . . Text FLAT
@stack . . . . . . . . . . . . . Text FLAT
exit . . . . . . . . . . . . . . L Near 00000000 FLAT External C
message . . . . . . . . . . . . Byte 00000000 _DATA
printf . . . . . . . . . . . . . L Near 00000000 FLAT External C
0 Warnings
0 Errors
Write an x86 assembly language program that computes the following expression and prints the result as a hexadecimal number: [(129 – 66) × (445 + 136)] ÷ 3. As part of this program, create a callable function to print 1 byte as 2 hex digits.
Ex__2_expr_x86.asm
file contains the following example solution to this exercise:
.386
.model FLAT,C
.stack 400h
.code
includelib libcmt.lib
includelib legacy_stdio_definitions.lib
extern printf:near
extern exit:near
public main
main proc
; Print the leading output string
push offset msg1
call printf
; Compute [(129 – 66) * (445 + 136)] / 3
mov eax, 129
sub eax, 66
mov ebx, 445
add ebx, 136
mul bx
mov bx, 3
div bx
; Print the most significant byte
push eax
mov bl, ah
call print_byte
; Print the least significant byte
pop ebx
call print_byte
; Print the trailing output string
push offset msg2
call printf
push 0
call exit
main endp
; Pass the byte to be printed in ebx
print_byte proc
; x86 function prologue
push ebp
mov ebp, esp
; Use the C library printf function
and ebx, 0ffh
push ebx
push offset fmt_str
call printf
; x86 function epilogue
mov esp, ebp
pop ebp
ret
print_byte endp
.data
fmt_str db "%02X", 0
msg1 db "[(129 - 66) * (445 + 136)] / 3 = ", 0
msg2 db "h", 9
end
ml /Fl /Zi /Zd Ex__1_hello_x86.asm
C:>Ex__2_expr_x86.exe
[(129 - 66) * (445 + 136)] / 3 = 2FA9h
This is the listing file created by the build procedure:
Microsoft (R) Macro Assembler Version 14.23.28107.0 01/26/20 20:45:09
Ex__2_expr_x86.asm Page 1 - 1
.386
.model FLAT,C
.stack 400h
00000000 .code
includelib libcmt.lib
includelib legacy_stdio_definitions.lib
extern printf:near
extern exit:near
public main
00000000 main proc
; Print the leading output string
00000000 68 00000005 R push offset msg1
00000005 E8 00000000 E call printf
; Compute [(129 – 66) * (445 + 136)] / 3
0000000A B8 00000081 mov eax, 129
0000000F 83 E8 42 sub eax, 66
00000012 BB 000001BD mov ebx, 445
00000017 81 C3 00000088 add ebx, 136
0000001D 66| F7 E3 mul bx
00000020 66| BB 0003 mov bx, 3
00000024 66| F7 F3 div bx
; Print the most significant byte
00000027 50 push eax
00000028 8A DC mov bl, ah
0000002A E8 00000017 call print_byte
; Print the least significant byte
0000002F 5B pop ebx
00000030 E8 00000011 call print_byte
; Print the trailing output string
00000035 68 00000027 R push offset msg2
0000003A E8 00000000 E call printf
0000003F 6A 00 push 0
00000041 E8 00000000 E call exit
00000046 main endp
; Pass the byte to be printed in ebx
00000046 print_byte proc
; x86 function prologue
00000046 55 push ebp
00000047 8B EC mov ebp, esp
; Use the C library printf function
00000049 81 E3 000000FF and ebx, 0ffh
0000004F 53 push ebx
00000050 68 00000000 R push offset fmt_str
00000055 E8 00000000 E call printf
; x86 function epilogue
0000005A 8B E5 mov esp, ebp
0000005C 5D pop ebp
0000005D C3 ret
0000005E print_byte endp
00000000 .data
00000000 25 30 32 58 00 fmt_str db "%02X", 0
00000005 5B 28 31 32 39 msg1 db "[(129 - 66) * (445 + 136)] / 3 = ", 0
20 2D 20 36 36
29 20 2A 20 28
34 34 35 20 2B
20 31 33 36 29
5D 20 2F 20 33
20 3D 20 00
00000027 68 09 msg2 db "h", 9
end
Microsoft (R) Macro Assembler Version 14.23.28107.0 01/26/20 20:45:09
Ex__2_expr_x86.asm Symbols 2 - 1
Segments and Groups:
N a m e Size Length Align Combine Class
FLAT . . . . . . . . . . . . . . GROUP
STACK . . . . . . . . . . . . . 32 Bit 00000400 DWord Stack 'STACK'
_DATA . . . . . . . . . . . . . 32 Bit 00000029 DWord Public 'DATA'
_TEXT . . . . . . . . . . . . . 32 Bit 0000005E DWord Public 'CODE'
Procedures, parameters, and locals:
N a m e Type Value Attr
main . . . . . . . . . . . . . . P Near 00000000 _TEXT Length= 00000046
print_byte . . . . . . . . . . . P Near 00000046 _TEXT Length= 00000018
Symbols:
N a m e Type Value Attr
@CodeSize . . . . . . . . . . . Number 00000000h
@DataSize . . . . . . . . . . . Number 00000000h
@Interface . . . . . . . . . . . Number 00000001h
@Model . . . . . . . . . . . . . Number 00000007h
@code . . . . . . . . . . . . . Text _TEXT
@data . . . . . . . . . . . . . Text FLAT
@fardata? . . . . . . . . . . . Text FLAT
@fardata . . . . . . . . . . . . Text FLAT
@stack . . . . . . . . . . . . . Text FLAT
exit . . . . . . . . . . . . . . L Near 00000000 FLAT External C
fmt_str . . . . . . . . . . . . Byte 00000000 _DATA
msg1 . . . . . . . . . . . . . . Byte 00000005 _DATA
msg2 . . . . . . . . . . . . . . Byte 00000027 _DATA
printf . . . . . . . . . . . . . L Near 00000000 FLAT External C
0 Warnings
0 Errors
In the Windows search box in the taskbar, begin typing x64 Native Tools Command Prompt for VS 2022
. When the app appears in the search menu, select it to open Command Prompt:
hello_x64.asm
with the content shown in the source listing in the x64 assembly language section of Chapter 10, Modern Processor Architectures and Instruction Sets.Ex__3_hello_x64.asm
file contains the following example solution to this exercise:
.code
includelib libcmt.lib
includelib legacy_stdio_definitions.lib
extern printf:near
extern exit:near
public main
main proc
; Reserve stack space
sub rsp, 40
; Print the message
lea rcx, message
call printf
; Exit the program with status 0
xor rcx, rcx
call exit
main endp
.data
message db "Hello, Computer Architect!",0
end
ml64 /Fl /Zi /Zd Ex__3_hello_x64.asm
C:>Ex__3_hello_x64.exe
Hello, Computer Architect!
This is the listing file created by the build procedure:
Microsoft (R) Macro Assembler (x64) Version 14.31.31104.0 02/21/22 07:47:41
Ex__3_hello_x64.asm Page 1 - 1
00000000 .code
includelib libcmt.lib
includelib legacy_stdio_definitions.lib
extern printf:near
extern exit:near
public main
00000000 main proc
; Reserve stack space
00000000 48/ 83 EC 28 sub rsp, 40
; Print the message
00000004 48/ 8D 0D lea rcx, message
00000000 R
0000000B E8 00000000 E call printf
; Exit the program with status 0
00000010 48/ 33 C9 xor rcx, rcx
00000013 E8 00000000 E call exit
00000018 main endp
00000000 .data
00000000 48 65 6C 6C 6F message db "Hello, Computer Ar-chitect!",0
2C 20 43 6F 6D
70 75 74 65 72
20 41 72 63 68
69 74 65 63 74
21 00
end
Microsoft (R) Macro Assembler (x64) Version 14.31.31104.0 02/21/22 07:47:41
Ex__3_hello_x64.asm Symbols 2 - 1
Procedures, parameters, and locals:
N a m e Type Value Attr
main . . . . . . . . . . . . . . P 00000000 _TEXT Length= 00000018 Public
Symbols:
N a m e Type Value Attr
exit . . . . . . . . . . . . . . L 00000000 _TEXT External
message . . . . . . . . . . . . Byte 00000000 _DATA
printf . . . . . . . . . . . . . L 00000000 _TEXT External
0 Warnings
0 Errors
Write an x64 assembly language program that computes the following expression and prints the result as a hexadecimal number: [(129 – 66) × (445 + 136)] ÷ 3. As part of this program, create a callable function to print 1 byte as 2 hex digits.
Ex__4_expr_x64.asm
file contains the following example solution to this exercise:
.code
includelib libcmt.lib
includelib legacy_stdio_definitions.lib
extern printf:near
extern exit:near
public main
main proc
; Reserve stack space
sub rsp, 40
; Print the leading output string
lea rcx, msg1
call printf
; Compute [(129 – 66) * (445 + 136)] / 3
mov eax, 129
sub eax, 66
mov ebx, 445
add ebx, 136
mul bx
mov bx, 3
div bx
; Print the most significant byte
push rax
mov bl, ah
and ebx, 0ffh
call print_byte
; Print the least significant byte
pop rbx
and ebx, 0ffh
call print_byte
; Print the trailing output string
lea rcx, msg2
call printf
; Exit the program with status 0
xor rcx, rcx
call exit
main endp
; Pass the byte to be printed in ebx
print_byte proc
; x64 function prologue
sub rsp, 40
; Use the C library printf function
mov rdx, rbx
lea rcx, fmt_str
call printf
; x64 function epilogue
add rsp, 40
ret
print_byte endp
.data
fmt_str db "%02X", 0
msg1 db "[(129 - 66) * (445 + 136)] / 3 = ", 0
msg2 db "h", 9
end
ml64 /Fl /Zi /Zd Ex__3_hello_x64.asm
C:>Ex__4_expr_x64.exe
[(129 - 66) * (445 + 136)] / 3 = 2FA9h
This is the listing file created by the build procedure:
Microsoft (R) Macro Assembler (x64) Version 14.31.31104.0 02/21/22 07:49:37
Ex__4_expr_x64.asm Page 1 - 1
00000000 .code
includelib libcmt.lib
includelib legacy_stdio_definitions.lib
extern printf:near
extern exit:near
public main
00000000 main proc
; Reserve stack space
00000000 48/ 83 EC 28 sub rsp, 40
; Print the leading output string
00000004 48/ 8D 0D lea rcx, msg1
00000005 R
0000000B E8 00000000 E call printf
; Compute [(129 – 66) * (445 + 136)] / 3
00000010 B8 00000081 mov eax, 129
00000015 83 E8 42 sub eax, 66
00000018 BB 000001BD mov ebx, 445
0000001D 81 C3 00000088 add ebx, 136
00000023 66| F7 E3 mul bx
00000026 66| BB 0003 mov bx, 3
0000002A 66| F7 F3 div bx
; Print the most significant byte
0000002D 50 push rax
0000002E 8A DC mov bl, ah
00000030 81 E3 000000FF and ebx, 0ffh
00000036 E8 00000020 call print_byte
; Print the least significant byte
0000003B 5B pop rbx
0000003C 81 E3 000000FF and ebx, 0ffh
00000042 E8 00000014 call print_byte
; Print the trailing output string
00000047 48/ 8D 0D lea rcx, msg2
00000027 R
0000004E E8 00000000 E call printf
; Exit the program with status 0
00000053 48/ 33 C9 xor rcx, rcx
00000056 E8 00000000 E call exit
0000005B main endp
; Pass the byte to be printed in ebx
0000005B print_byte proc
; x64 function prologue
0000005B 48/ 83 EC 28 sub rsp, 40
; Use the C library printf function
0000005F 48/ 8B D3 mov rdx, rbx
00000062 48/ 8D 0D lea rcx, fmt_str
00000000 R
00000069 E8 00000000 E call printf
; x64 function epilogue
0000006E 48/ 83 C4 28 add rsp, 40
00000072 C3 ret
00000073 print_byte endp
00000000 .data
00000000 25 30 32 58 00 fmt_str db "%02X", 0
00000005 5B 28 31 32 39 msg1 db "[(129 - 66) * (445 + 136)] / 3 = ", 0
20 2D 20 36 36
29 20 2A 20 28
34 34 35 20 2B
20 31 33 36 29
5D 20 2F 20 33
20 3D 20 00
00000027 68 09 msg2 db "h", 9
end
Microsoft (R) Macro Assembler (x64) Version 14.31.31104.0 02/21/22 07:49:37
Ex__4_expr_x64.asm Symbols 2 - 1
Procedures, parameters, and locals:
N a m e Type Value Attr
main . . . . . . . . . . . . . . P 00000000 _TEXT Length= 0000005B
print_byte . . . . . . . . . . . P 0000005B _TEXT Length= 00000018
Symbols:
N a m e Type Value Attr
exit . . . . . . . . . . . . . . L 00000000 _TEXT External
fmt_str . . . . . . . . . . . . Byte 00000000 _DATA
msg1 . . . . . . . . . . . . . . Byte 00000005 _DATA
msg2 . . . . . . . . . . . . . . Byte 00000027 _DATA
printf . . . . . . . . . . . . . L 00000000 _TEXT External
0 Warnings
0 Errors
Install the free Android Studio IDE, available at https://developer.android.com/studio/. After installation is complete, open the Android Studio IDE and select SDK Manager under the Tools menu. Select the SDK Tools tab and check the NDK option, which may be called NDK (Side by side). Complete the installation of the NDK (NDK stands for native development kit):
%LOCALAPPDATA%Android
) and add their directories to your PATH
environment variable: arm-linux-androideabi-as.exe
and adb.exe
. Hint: the following command works for one specific version of Android Studio (your path may vary):
set PATH=%PATH%;%LOCALAPPDATA%Android Sdk
dk23.0.7599858 oolchainsllvmprebuiltwindows-x86_64in;%LOCALAPPDATA%AndroidSdkplatform-tools
hello_arm.s
with the content shown in the source listing in the 32-bit ARM assembly language section of Chapter 10, Modern Processor Architectures and Instruction Sets.Ex__5_hello_arm.s
file contains the following example solution to this exercise:
.text
.global _start
_start:
// Print the message to file 1 (stdout) with syscall 4
mov r0, #1
ldr r1, =msg
mov r2, #msg_len
mov r7, #4
svc 0
// Exit the program with syscall 1, returning status 0
mov r0, #0
mov r7, #1
svc 0
.data
msg:
.ascii "Hello, Computer Architect!"
msg_len = . - msg
arm-linux-androideabi-as -al=Ex__5_hello_arm.lst -o Ex__5_hello_ arm.o Ex__5_hello_arm.s
arm-linux-androideabi-ld -o Ex__5_hello_arm Ex__5_hello_arm.o
C:>adb devices
* daemon not running; starting now at tcp:5037
* daemon started successfully
List of devices attached
9826f541374f4b4a68 device
C:>adb push Ex__5_hello_arm /data/local/tmp/Ex__5_hello_arm
Ex__5_hello_arm: 1 file pushed. 0.0 MB/s (868 bytes in 0.059s)
C:>adb shell chmod +x /data/local/tmp/Ex__5_hello_arm
C:>adb shell /data/local/tmp/Ex__5_hello_arm
Hello, Computer Architect!
This is the listing file created by the build procedure:
ARM GAS Ex__5_hello_arm.s page 1
1 .text
2 .global _start
3
4 _start:
5 // Print the message to file 1 (stdout) with syscall 4
6 0000 0100A0E3 mov r0, #1
7 0004 14109FE5 ldr r1, =msg
8 0008 1A20A0E3 mov r2, #msg_len
9 000c 0470A0E3 mov r7, #4
10 0010 000000EF svc 0
11
12 // Exit the program with syscall 1, returning status 0
13 0014 0000A0E3 mov r0, #0
14 0018 0170A0E3 mov r7, #1
15 001c 000000EF svc 0
16
17 .data
18 msg:
19 0000 48656C6C .ascii "Hello, Computer Architect!"
19 6F2C2043
19 6F6D7075
19 74657220
19 41726368
20 msg_len = . - msg
Write a 32-bit ARM assembly language program that computes the following expression and prints the result as a hexadecimal number: [(129 – 66) × (445 + 136)] ÷ 3. As part of this program, create a callable function to print 1 byte as 2 hex digits.
Ex__6_expr_arm.s
file contains the following example solution to this exercise:
.text
.global _start
_start:
// Print the leading output string
ldr r1, =msg1
mov r2, #msg1_len
bl print_string
// Compute [(129 – 66) * (445 + 136)] / 3
mov r0, #129
sub r0, r0, #66
ldr r1, =#445
add r1, r1, #136
mul r0, r1, r0
mov r1, #3
udiv r0, r0, r1
// Print the upper byte of the result
push {r0}
lsr r0, r0, #8
bl print_byte
// Print the lower byte of the result
pop {r0}
bl print_byte
// Print the trailng output string
ldr r1, =msg2
mov r2, #msg2_len
bl print_string
// Exit the program with syscall 1, returning status 0
mov r0, #0
mov r7, #1
svc 0
// Print a string; r1=string address, r2=string length
print_string:
mov r0, #1
mov r7, #4
svc 0
mov pc, lr
// Convert the low 4 bits of r0 to an ascii character in r0
nibble2ascii:
and r0, #0xF
cmp r0, #10
addpl r0, r0, #('A' - 10)
addmi r0, r0, #'0'
mov pc, lr
// Print a byte in hex
print_byte:
push {lr}
push {r0}
lsr r0, r0, #4
bl nibble2ascii
ldr r1, =bytes
strb r0, [r1], #1
pop {r0}
bl nibble2ascii
strb r0, [r1]
ldr r1, =bytes
mov r2, #2
bl print_string
pop {lr}
mov pc, lr
.data
msg1:
.ascii "[(129 - 66) * (445 + 136)] / 3 = "
msg1_len = . - msg1
bytes:
.ascii "??"
msg2:
.ascii "h"
msg2_len = . - msg2
arm-linux-androideabi-as -al=Ex__6_expr_arm.lst -o Ex__6_expr_arm.o Ex__6_expr_arm.s
arm-linux-androideabi-ld -o Ex__6_expr_arm Ex__6_expr_arm.o
C:>adb devices
* daemon not running; starting now at tcp:5037
* daemon started successfully
List of devices attached
9826f541374f4b4a68 device
C:>adb push Ex__6_expr_arm /data/local/tmp/Ex__6_expr_arm
Ex__6_expr_arm: 1 file pushed. 0.2 MB/s (1188 bytes in 0.007s)
C:>adb shell chmod +x /data/local/tmp/Ex__6_expr_arm
C:>adb shell /data/local/tmp/Ex__6_expr_arm
[(129 - 66) * (445 + 136)] / 3 = 2FA9h
This is the listing file created by the build procedure:
ARM GAS Ex__6_expr_arm.s page 1
1 .text
2 .global _start
3
4 _start:
5 // Print the leading output string
6 0000 A8109FE5 ldr r1, =msg1
7 0004 2120A0E3 mov r2, #msg1_len
8 0008 110000EB bl print_string
9
10 // Compute [(129 – 66) * (445 + 136)] / 3
11 000c 8100A0E3 mov r0, #129
12 0010 420040E2 sub r0, r0, #66
13 0014 98109FE5 ldr r1, =#445
14 0018 881081E2 add r1, r1, #136
15 001c 910000E0 mul r0, r1, r0
16 0020 0310A0E3 mov r1, #3
17 0024 10F130E7 udiv r0, r0, r1
18
19 // Print the upper byte of the result
20 0028 04002DE5 push {r0}
21 002c 2004A0E1 lsr r0, r0, #8
22 0030 100000EB bl print_byte
23
24 // Print the lower byte of the result
25 0034 04009DE4 pop {r0}
26 0038 0E0000EB bl print_byte
27
28 // Print the trailng output string
29 003c 74109FE5 ldr r1, =msg2
30 0040 0120A0E3 mov r2, #msg2_len
31 0044 020000EB bl print_string
32
33 // Exit the program with syscall 1, returning status 0
34 0048 0000A0E3 mov r0, #0
35 004c 0170A0E3 mov r7, #1
36 0050 000000EF svc 0
37
38 // Print a string; r1=string address, r2=string length
39 print_string:
40 0054 0100A0E3 mov r0, #1
41 0058 0470A0E3 mov r7, #4
42 005c 000000EF svc 0
43 0060 0EF0A0E1 mov pc, lr
44
45 // Convert the low 4 bits of r0 to an ascii character in r0
46 nibble2ascii:
47 0064 0F0000E2 and r0, #0xF
48 0068 0A0050E3 cmp r0, #10
49 006c 37008052 addpl r0, r0, #('A' - 10)
50 0070 30008042 addmi r0, r0, #'0'
51 0074 0EF0A0E1 mov pc, lr
52
53 // Print a byte in hex
54 print_byte:
55 0078 04E02DE5 push {lr}
56 007c 04002DE5 push {r0}
57 0080 2002A0E1 lsr r0, r0, #4
ARM GAS Ex__6_expr_arm.s page 2
58 0084 F6FFFFEB bl nibble2ascii
59 0088 2C109FE5 ldr r1, =bytes
60 008c 0100C1E4 strb r0, [r1], #1
61
62 0090 04009DE4 pop {r0}
63 0094 F2FFFFEB bl nibble2ascii
64 0098 0000C1E5 strb r0, [r1]
65
66 009c 18109FE5 ldr r1, =bytes
67 00a0 0220A0E3 mov r2, #2
68 00a4 EAFFFFEB bl print_string
69
70 00a8 04E09DE4 pop {lr}
71 00ac 0EF0A0E1 mov pc, lr
72
73 .data
74 msg1:
75 0000 5B283132 .ascii "[(129 - 66) * (445 + 136)] / 3 = "
75 39202D20
75 36362920
75 2A202834
75 3435202B
76 msg1_len = . - msg1
77
78 bytes:
79 0021 3F3F .ascii "??"
80
81 msg2:
82 0023 68 .ascii "h"
83 msg2_len = . - msg2
Locate the following files under the Android SDK installation directory (the default location is %LOCALAPPDATA%Android
) and add their directories to your PATH
environment variable: aarch64-linux-android-as.exe
and adb.exe
. Hint: the following command works for one version of Android Studio (your path may vary):
set PATH=%PATH%;%LOCALAPPDATA%AndroidSdk
dk23.0.7599858 oolchainsllvmprebuiltwindows-x86_64in;%LOCALAPPDATA%AndroidSdkplatform-tools
hello_arm64.s
with the content shown in the source listing in the 64-bit ARM assembly language section of Chapter 10, Modern Processor Architectures and Instruction Sets.Ex__7_hello_arm64.s
file contains the following example solution to this exercise:
.text
.global _start
_start:
// Print the message to file 1 (stdout) with syscall 64
mov x0, #1
ldr x1, =msg
mov x2, #msg_len
mov x8, #64
svc 0
// Exit the program with syscall 93, returning status 0
mov x0, #0
mov x8, #93
svc 0
.data
msg:
.ascii "Hello, Computer Architect!"
msg_len = . - msg
arm-linux-androideabi-as -al=Ex__6_expr_arm.lst -o Ex__6_expr_arm.o Ex__6_expr_arm.s
arm-linux-androideabi-ld -o Ex__6_expr_arm Ex__6_expr_arm.o
C:>adb devices
* daemon not running; starting now at tcp:5037
* daemon started successfully
List of devices attached
9826f541374f4b4a68 device
C:>adb push Ex__7_hello_arm64 /data/local/tmp/Ex__7_hello_arm64
Ex__7_hello_arm64: 1 file pushed. 0.0 MB/s (1152 bytes in 0.029s)
C:>adb shell chmod +x /data/local/tmp/Ex__7_hello_arm64
C:>adb shell /data/local/tmp/Ex__7_hello_arm64
Hello, Computer Architect!
This is the listing file created by the build procedure:
AARCH64 GAS Ex__7_hello_arm64.s page 1
1 .text
2 .global _start
3
4 _start:
5 // Print the message to file 1 (stdout) with syscall 64
6 0000 200080D2 mov x0, #1
7 0004 E1000058 ldr x1, =msg
8 0008 420380D2 mov x2, #msg_len
9 000c 080880D2 mov x8, #64
10 0010 010000D4 svc 0
11
12 // Exit the program with syscall 93, returning status 0
13 0014 000080D2 mov x0, #0
14 0018 A80B80D2 mov x8, #93
15 001c 010000D4 svc 0
16
17 .data
18 msg:
19 0000 48656C6C .ascii "Hello, Computer Architect!"
19 6F2C2043
19 6F6D7075
19 74657220
19 41726368
20 msg_len = . - msg
Write a 64-bit ARM assembly language program that computes the following expression and prints the result as a hexadecimal number: [(129 – 66) × (445 + 136)] ÷ 3. As part of this program, create a callable function to print 1 byte as 2 hex digits.
Ex__8_expr_arm64.s
file contains the following example solution to this exercise:
.text
.global _start
_start:
// Print the leading output string
ldr x1, =msg1
mov x2, #msg1_len
bl print_string
// Compute [(129 – 66) * (445 + 136)] / 3
mov x0, #129
sub x0, x0, #66
mov x1, #445
add x1, x1, #136
mul x0, x1, x0
mov x1, #3
udiv x0, x0, x1
// Print the upper byte of the result
mov x19, x0
lsr x0, x0, #8
bl print_byte
// Print the lower byte of the result
mov x0, x19
bl print_byte
// Print the trailng output string
ldr x1, =msg2
mov x2, #msg2_len
bl print_string
// Exit the program with syscall 93, returning status 0
mov x0, #0
mov x8, #93
svc 0
// Print a string; x1=string address, x2=string length
print_string:
mov x0, #1
mov x8, #64
svc 0
ret x30
// Convert the low 4 bits of x0 to an ascii character in x0
nibble2ascii:
and x0, x0, #0xF
cmp x0, #10
bmi lt10
add x0, x0, #('A' - 10)
b done
lt10:
add x0, x0, #'0'
done:
ret x30
// Print a byte in hex
print_byte:
mov x21, x30
mov x20, x0
lsr x0, x0, #4
bl nibble2ascii
ldr x1, =bytes
strb w0, [x1], #1
mov x0, x20
bl nibble2ascii
strb w0, [x1]
ldr x1, =bytes
mov x2, #2
bl print_string
mov x30, x21
ret x30
.data
msg1:
.ascii "[(129 - 66) * (445 + 136)] / 3 = "
msg1_len = . - msg1
bytes:
.ascii "??"
msg2:
.ascii "h"
msg2_len = . - msg2
aarch64-linux-android-as -al=Ex__8_expr_arm64.lst -o Ex__8_expr_arm64.o Ex__8_expr_arm64.s
aarch64-linux-android-ld -o Ex__8_expr_arm64 Ex__8_expr_arm64.o
C:>adb devices
* daemon not running; starting now at tcp:5037
* daemon started successfully
List of devices attached
9826f541374f4b4a68 device
C:>adb push Ex__8_expr_arm64 /data/local/tmp/Ex__8_expr_arm64
Ex__8_expr_arm64: 1 file pushed. 0.1 MB/s (1592 bytes in 0.015s)
C:>adb shell chmod +x /data/local/tmp/Ex__8_expr_arm64
C:>adb shell /data/local/tmp/Ex__8_expr_arm64
[(129 - 66) * (445 + 136)] / 3 = 2FA9h
This is the listing file created by the build procedure:
AARCH64 GAS Ex__8_expr_arm64.s page 1
1 .text
2 .global _start
3
4 _start:
5 // Print the leading output string
6 0000 C1050058 ldr x1, =msg1
7 0004 220480D2 mov x2, #msg1_len
8 0008 13000094 bl print_string
9
10 // Compute [(129 – 66) * (445 + 136)] / 3
11 000c 201080D2 mov x0, #129
12 0010 000801D1 sub x0, x0, #66
13 0014 A13780D2 mov x1, #445
14 0018 21200291 add x1, x1, #136
15 001c 207C009B mul x0, x1, x0
16 0020 610080D2 mov x1, #3
17 0024 0008C19A udiv x0, x0, x1
18
19 // Print the upper byte of the result
20 0028 F30300AA mov x19, x0
21 002c 00FC48D3 lsr x0, x0, #8
22 0030 14000094 bl print_byte
23
24 // Print the lower byte of the result
25 0034 E00313AA mov x0, x19
26 0038 12000094 bl print_byte
27
28 // Print the trailng output string
29 003c 21040058 ldr x1, =msg2
30 0040 220080D2 mov x2, #msg2_len
31 0044 04000094 bl print_string
32
33 // Exit the program with syscall 93, returning status 0
34 0048 000080D2 mov x0, #0
35 004c A80B80D2 mov x8, #93
36 0050 010000D4 svc 0
37
38 // Print a string; x1=string address, x2=string length
39 print_string:
40 0054 200080D2 mov x0, #1
41 0058 080880D2 mov x8, #64
42 005c 010000D4 svc 0
43 0060 C0035FD6 ret x30
44
45 // Convert the low 4 bits of x0 to an ascii character in x0
46 nibble2ascii:
47 0064 000C4092 and x0, x0, #0xF
48 0068 1F2800F1 cmp x0, #10
49 006c 64000054 bmi lt10
50
51 0070 00DC0091 add x0, x0, #('A' - 10)
52 0074 02000014 b done
53
54 lt10:
55 0078 00C00091 add x0, x0, #'0'
56
57 done:
AARCH64 GAS Ex__8_expr_arm64.s page 2
58 007c C0035FD6 ret x30
59
60 // Print a byte in hex
61 print_byte:
62 0080 F5031EAA mov x21, x30
63 0084 F40300AA mov x20, x0
64 0088 00FC44D3 lsr x0, x0, #4
65 008c F6FFFF97 bl nibble2ascii
66 0090 C1010058 ldr x1, =bytes
67 0094 20140038 strb w0, [x1], #1
68
69 0098 E00314AA mov x0, x20
70 009c F2FFFF97 bl nibble2ascii
71 00a0 20000039 strb w0, [x1]
72
73 00a4 21010058 ldr x1, =bytes
74 00a8 420080D2 mov x2, #2
75 00ac EAFFFF97 bl print_string
76
77 00b0 FE0315AA mov x30, x21
78 00b4 C0035FD6 ret x30
79
80 .data
81 msg1:
82 0000 5B283132 .ascii "[(129 - 66) * (445 + 136)] / 3 = "
82 39202D20
82 36362920
82 2A202834
82 3435202B
83 msg1_len = . - msg1
84
85 bytes:
86 0021 3F3F .ascii "??"
87
88 msg2:
89 0023 68 .ascii "h"
90 msg2_len = . - msg2
Visit https://www.sifive.com/software/ and download Freedom Studio. Freedom Studio is an Eclipse IDE-based development suite with a complete set of tools for building an RISC-V application and running it on a hardware RISC-V processor or in the emulation environment included with Freedom Studio. Follow the instructions in the Freedom Studio User Manual to complete the installation. Start Freedom Studio and create a new Freedom E SDK project. In the project creation dialog, select qemu-sifive-u54
as the target (this is a single-core 64-bit RISC-V processor in the RV64GC configuration). Select the hello
example program and click on the Finish button. This will start a build of the example program and the RISC-V emulator. After the build completes, the Edit Configuration dialog box will appear. Click on Debug to start the program in the emulator debug environment. Single-step through the program and verify that the text Hello, World! appears in the console window.
Install Freedom Studio as described. Note that the directory path for your workspace cannot include spaces. Start Freedom Studio:
With the project from Exercise 1 still open, locate the hello.c
file in the src
folder in the Project window. Right-click on the file and rename it to hello.s
. Open hello.s
in the editor and delete the entire contents. Insert the assembly language program shown in the RISC-V assembly language section in Chapter 11, The RISC-V Architecture and Instruction Set. Perform a cleaning operation and then rebuild the project (press Ctrl + 9 to initiate the cleaning operation). Select Debug under the Run menu. Once the debugger starts, open Windows to display the hello.s
source file, the Disassembly window, and the Registers window. Expand the Registers tree to display the RISC-V processor registers. Single-step through the program and verify that the text Hello, Computer Architect! appears in the console window.
With the project from Exercise 1 still open, locate the hello.c
file in the src
folder in the Project window and then do the following:
hello.s
.hello.s
in the editor and delete the entire contents.Ex__2_riscv_assembly.s
file:
.section .text
.global main
main:
# Reserve stack space and save the return address
addi sp, sp, -16
sd ra, 0(sp)
# Print the message using the C library puts function
1: auipc a0, %pcrel_hi(msg)
addi a0, a0, %pcrel_lo(1b)
jal ra, puts
# Restore the return address and sp, and return to caller
ld ra, 0(sp)
addi sp, sp, 16
jalr zero, ra, 0
.section .rodata
msg:
.asciz "Hello, Computer Architect!
"
hello.s
source file, the Disassembly window, and the Registers window.Registers
tree to display the RISC-V processor registers.Write an RISC-V assembly language program that computes the following expression and prints the result as a hexadecimal number:
As part of this program, create a callable function to print 1 byte as 2 hex digits.
Create a new Freedom Studio project using the same steps as in Exercise 1 in Chapter 11, The RISC-V Architecture and Instruction Set. Locate the hello.c
file in the src
folder in the Project window:
hello.s
.hello.s
file. The Ex__3_riscv_expr.s
file contains the following example solution to this exercise:
.section .text
.global main
main:
# Reserve stack space and save the return address
addi sp, sp, -16
sd ra, 0(sp)
# Print the leading output string
la a0, msg1
jal ra, puts
# Compute [(129 – 66) * (445 + 136)] / 3
addi a0, zero, 129
addi a0, a0, -66
addi a1, zero, 445
add a1, a1, 136
mul a0, a1, a0
addi a1, zero, 3
divu a0, a0, a1
# Print the upper byte of the result
sw a0, 8(sp)
srl a0, a0, 8
jal ra, print_byte
# Print the lower byte of the result
lw a0, 8(sp)
jal ra, print_byte
# Print the trailng output string
la a0, msg2
jal ra, puts
# Restore the return address and sp
ld ra, 0(sp)
addi sp, sp, 16
# Set the exit code to zero and return to caller
addi a0, zero, 0
ret
# Convert the low 4 bits of a0 to an ascii character in a0
nibble2ascii:
# Reserve stack space and save the return address
addi sp, sp, -16
sd ra, 0(sp)
and a0, a0, 0xF
sltu t0, a0, 10
bne t0, zero, lt10
add a0, a0, ('A' - 10)
j done
lt10:
add a0, a0, '0'
done:
ld ra, 0(sp)
addi sp, sp, 16
ret
# Print a byte in hex
print_byte:
# Reserve stack space and save the return address
addi sp, sp, -16
sd ra, 0(sp)
addi t1, a0, 0
srl a0, t1, 4
jal ra, nibble2ascii
la t3, bytes
sb a0, 0(t3)
addi a0, t1, 0
jal nibble2ascii
sb a0, 1(t3)
la a0, bytes
jal ra, puts
ld ra, 0(sp)
addi sp, sp, 16
ret
.section .data
msg1:
.asciz "[(129 - 66) * (445 + 136)] / 3 = "
bytes:
.asciz "??"
msg2:
.asciz "h"
hello.s
source file, the Disassembly window, and the Registers window.Download and install the current version of VirtualBox. Download, install, and bring up Ubuntu Linux as a VM within VirtualBox.
Connect the guest OS to the internet using a bridged network adapter. Configure and enable clipboard sharing and file sharing between the Ubuntu guest and your host operating system.
Perform the following steps:
Ubuntu
, select Linux as the type, and select Ubuntu (64-bit) as the version. Click Next..vdi
file you downloaded and select Open. Click Create to finish creating the VM.share
in your Documents
folder. Click Shared Folders in the VirtualBox Manager Settings dialog for your Ubuntu VM. Click the icon to add a shared folder (it looks like a folder with a plus on it). Select the share
folder you just created on the host computer and click OK.osboxes.org
.sudo apt-get update
sudo apt-get install gcc make perl
sudo apt-get install build-essential linux-headers-'uname -r' dkms
share
with the following command:
mkdir share
sudo mount -t vboxsf -o rw,uid=1000,gid=1000 share ~/share
cd ~/share
touch file1.txt
file1.txt
is now present in your Documentsshare
directory.Within the Ubuntu operating system you installed in Exercise 1, install VirtualBox and then install and bring up a virtual machine version of FreeDOS. Verify that DOS commands such as echo Hello World!
and mem
perform properly in the FreeDOS VM. After completing this exercise, you will have implemented an instance of nested virtualization.
sudo apt-get install virtualbox
sudo apt-get install p7zip-full
~/snap/firefox/common/Downloads
directory, and the FreeDOS image filename is 64-bit.7z
):
cd
mkdir 'VirtualBox VMs'
cd 'VirtualBox VMs'
mv ~/snap/firefox/common/Downloads/64bit.7z .
7z x 64bit.7z
virtualbox &
Name: FreeDOS
Type: Other
Version: DOS
32MB RAM
Use an existing virtual hard disk file
~/VirtualBox VMs
and complete the VM configuration.echo Hello World!
mem
dir
This screenshot shows the output of the mem
command:
Figure 4: FreeDOS screenshot
shutdown
Create two separate copies of your Ubuntu guest machine in your host system’s VirtualBox environment. Configure both Ubuntu guests to connect to the VirtualBox internal network. Set up the two machines with compatible IP addresses. Verify that each of the machines can receive a response from the other using the ping
command. By completing this exercise, you have configured a virtual network within your virtualized environment.
Internal
and then click OK.cd "Program FilesOracleVirtualBox"
intnet
VirtualBox network with this command:
VBoxManage dhcpserver add --netname intnet --ip 192.168.10.1 --netmask 255.255.255.0 --lowerip 192.168.10.100 --upperip 192.168.10.199 --enable
192.168.10.100
and 192.168.10.101
.hostname -I
ping 192.168.10.101
You should see a response similar to the following. Press Ctrl + C to stop the updates:
osboxes@osboxes:~$ ping 192.168.10.101
PING 192.168.10.101 (192.168.10.101) 56(84) bytes of data.
64 bytes from 192.168.10.101: icmp_seq=1 ttl=64 time=0.372 ms
64 bytes from 192.168.10.101: icmp_seq=2 ttl=64 time=0.268 ms
64 bytes from 192.168.10.101: icmp_seq=3 ttl=64 time=0.437 ms
64 bytes from 192.168.10.101: icmp_seq=4 ttl=64 time=0.299 ms
^C
--- 192.168.10.101 ping statistics ---
4 packets transmitted, 4 received, 0% packet loss, time 3054ms
rtt min/avg/max/mdev = 0.268/0.344/0.437/0.065 ms
osboxes@osboxes:~$
ping
command on the second machine, switching the target to the IP address of the first machine. Verify that the response is similar to the previous result.Draw a block diagram of the computing architecture for a system to measure and report weather data 24 hours a day at 5-minute intervals using SMS text messages. The system is battery-powered and relies on solar cells to recharge the battery during daylight hours. Assume the weather instrumentation consumes minimal average power, only requiring full power momentarily during each measurement cycle.
Based on the performance requirements, a processor capable of entering a very low power state for minutes at a time should be able to operate from a moderately sized battery for days at a time. By only powering weather sensors when necessary to take a measurement, and only powering the cellular transceiver when it is time to transmit data, power usage is minimized.
The following diagram represents one possible configuration for this system:
Figure 5: Initial weather data collection system diagram
For the system of Exercise 1, identify a suitable, commercially available processor and list the reasons why that processor is a good choice for this application. Some factors to weigh are cost, processing speed, tolerance for harsh environments, power consumption, and integrated features such as RAM and communication interfaces.
Perform the following steps:
low-power microprocessor
brings up a selection of processors from manufacturers including STM, Analog Devices, Texas Instruments, Microchip Technology, and several others.embedded cellular modem
produces a list of cellular modems suitable for this application. Some of these devices are in the form of a system-on-module (SoM), incorporating the RF modem with a programmable processor core in a single module.STM32L471QG
32-bit ARM processor operating at 80 MHz. This processor provides a great deal of capability, including an FPU and dynamic voltage scaling. It is possible to perform extensive preprocessing (filtering, sensor fault detection, and so on) on sensor measurements prior to the transmission of data. The flash and RAM within the device should be more than adequate for the application.Figure 6: Final weather data collection system diagram
Where supported, set up two-factor authentication for all your internet-accessible accounts containing data that you care about. This includes bank accounts, email accounts, social media, code repositories (if you are a software developer), medical services, and anything else you value. Ensure at all stages that you are using only information and software applications from trusted sources.
A comprehensive list of websites and their support (or non-support) for two-factor authentication is available at 2FA Directory (https://2fa.directory/). 2FA is an abbreviation for two-factor authentication.
The most common method for implementing two-factor authentication is for the site to send an SMS text containing a code to the phone number associated with the account after the user enters a valid username and password.
The code is often a 6-digit number that the user must provide to the website to complete the login process. The two factors used for authentication are the user’s knowledge of the account password and the demonstrated access to the phone associated with the account.
Some sites support an app such as the Duo Mobile app (https://duo.com/product/multi-factor-authentication-mfa/duo-mobile-app) for two-factor authentication. When accessing a site that uses the app, after entering username and password information, a notification will appear on your phone. With a single tap, you can approve access and finish logging in.
Create strong passwords for all your internet-accessible accounts containing information of value that cannot be protected by two-factor authentication. A strong password is long (15 characters or more) and includes uppercase, lowercase, numeric, and special characters (for example: ! “ # $ % & ‘ ( ) * +). To keep track of these complicated passwords, install and use a reputable password-safe application. Take care when selecting a password safe and consider its source.
There are many options in terms of securely storing passwords for use on your computer and on other devices. Most web browsers offer password management, as do most antivirus software packages. Standalone password manager applications are available as well. You can start to narrow your choices by performing an internet search for password manager.
When a site asks you to set a password, you can have the password manager generate a long random-looking string of characters as your new password. You won’t need to remember the password because it will be stored securely by the password manager.
When selecting a password management solution, you should consider the need to maintain current passwords on all your devices. When you change the password for a site, you do not want to have to update the new password in several places. A browser-based password manager such as Firefox (https://www.mozilla.org/en-US/) will take care of this for you as long as you have a Firefox account and you have logged in to it on each device.
Update the operating system and other applications and services (such as Java) on all computers and other devices under your control. This will ensure that the security updates included in those updates start working to protect you soon after they become available. Set up a plan to continue regularly installing updates as they are released to ensure you are protected in the future.
Visit the blockchain explorer at https://bitaps.com and locate the list of last blocks on that page. Click on a block number and you will be presented with a display containing the hexadecimal listing of the block header along with its SHA-256 hash. Copy both items and write a program to determine if the hash provided is the correct hash of the header. Remember to perform SHA-256 twice to compute the header hash.
The Python file Ex__1_compute_block_hash.py
contains the block header hashing code:
#!/usr/bin/env python
"""Ex__1_compute_block_hash.py: Answer to Ch 15 Ex 1."""
# This is a solution for Bitcoin block 711735
# See https://bitaps.com/711735
import binascii
import hashlib
# The block header copied from bitaps.com
header = '00000020505424e0dc22a7fb1598d3a048a31957315f' +
'737ec0d00b0000000000000000005f7fbc00ac45edd1f6ca7' +
'713f2b048d8a771c95e1afd9140d3a147a063f64a76781ea4' +
'61139a0c17f666fc1afdbc08'
# The hash of the header copied from bitaps.com
header_hash =
'00000000000000000000bc01913c2e05a5d38d39a9df0c8ba' +
'4269abe9777f41f'
# Cut off any extra bytes beyond the 80-byte header
header = header[:160]
# Convert the header to binary
header = binascii.unhexlify(header)
# Compute the header hash (perform SHA-256 twice)
computed_hash = hashlib.sha256(header).digest()
computed_hash = hashlib.sha256(computed_hash).digest()
# Reverse the byte order
computed_hash = computed_hash[::-1]
# Convert the binary header hash to a hexadecimal string
computed_hash =
binascii.hexlify(computed_hash).decode("utf-8")
# Print the result
print('Header hash: ' + header_hash)
print('Computed hash: ' + computed_hash)
if header_hash == computed_hash:
result = 'Hashes match!'
else:
result = 'Hashes DO NOT match!'
print(result)
To execute the program, assuming Python is installed and is in your path, execute the command python Ex__1_compute_block_hash.py
.
This is the output of a test run:
C:>python Ex__1_compute_block_hash.py
Header hash: 00000000000000000000bc01913c2e05a5d38d39a9df0c8ba4269abe9777f41f
Computed hash: 00000000000000000000bc01913c2e05a5d38d39a9df0c8ba4269abe9777f41f
Hashes match!
Set up a full bitcoin peer node and connect it to the bitcoin network. Download the bitcoin core software from https://bitcoin.org/en/download. It is best to have a fast internet connection and at least 200 GB of free disk space.
If you do not already have Python installed on your computer, visit https://www.python.org/downloads/and install the current version. Ensure Python is in your search path by typing python –version
at a system command prompt. You should receive a response similar to Python 3.10.3. Install TensorFlow (an open source platform for machine learning) with the command (also at the system command prompt) pip install tensorflow
. You may need to use the Run as administrator option when opening the command prompt to get a successful installation. Install Matplotlib (a library for visualizing data) with the command pip install matplotlib
.
The Windows batch file Ex__1_install_tensorflow.bat
contains the commands to install TensorFlow and Matplotlib:
REM Ex__1_install_tensorflow.bat: Answer to Ch 16 Ex 1.
REM This batch file installs TensorFlow and Matplotlib in Windows.
REM Python must be installed (see https://www.python.org/downloads/).
REM The Python installation directory must be in the system path.
python --version
pip install tensorflow
pip install matplotlib
To run the batch file, assuming Python is installed and is in your path, open an Administrator command prompt and execute the command Ex__1_install_tensorflow.bat
.
Create a program using the TensorFlow library that loads the CIFAR-10 dataset and displays a subset of the images along with the label associated with each image. This dataset is a product of the Canadian Institute for Advanced Research (CIFAR) and contains 60,000 images, each consisting of 32x32 RGB pixels. The images have been randomly separated into a training set containing 50,000 images and a test set of 10,000 images. Each image has been labeled by humans as representing an item in one of 10 categories: airplane, automobile, bird, cat, deer, dog, frog, horse, ship, or truck. For more information on the CIFAR-10 dataset, see the technical report by Alex Krizhevsky at https://www.cs.toronto.edu/~kriz/learning-features-2009-TR.pdf.
The Python file Ex__2_load_dataset.py
contains the code to load the dataset and display a subset of the images:
#!/usr/bin/env python
"""Ex__2_load_dataset.py: Answer to Ch 16 Ex 2."""
from tensorflow.keras import datasets
import matplotlib.pyplot as plt
def load_dataset():
(train_images, train_labels),
(test_images, test_labels) =
datasets.cifar10.load_data()
# Normalize pixel values to the range 0-1
train_images = train_images / 255.0
test_images = test_images / 255.0
return train_images, train_labels,
test_images, test_labels
def plot_samples(train_images, train_labels):
class_names = ['Airplane', 'Automobile', 'Bird',
'Cat', 'Deer','Dog', 'Frog',
'Horse', 'Ship', 'Truck']
plt.figure(figsize=(14,7))
for i in range(60):
plt.subplot(5,12,i + 1)
plt.xticks([])
plt.yticks([])
plt.imshow(train_images[i])
plt.xlabel(class_names[train_labels[i][0]])
plt.show()
if __name__ == '__main__':
train_images, train_labels,
test_images, test_labels = load_dataset()
plot_samples(train_images, train_labels)
To execute the program, assuming Python is installed and is in your path, execute the command:
python Ex__2_load_dataset.py
.
If you receive an error message stating cudart64_110.dll not found
, you can safely ignore the message. This just means you do not have the library installed for running TensorFlow on an Nvidia CUDA GPU. The code will run (more slowly) on your system processor instead.
This is the set of sample images displayed by the code:
Figure 7: Sample CIFAR dataset images
Create a program using the TensorFlow library that builds a CNN using the structure shown in Figure 16.1. Use a 3x3 convolution filter in each convolutional layer. Use 32 filters in the first convolutional layer and 64 filters in the other two convolutional layers. Use 64 neurons in the hidden layer. Provide 10 output neurons representing an image’s presence in one of the 10 CIFAR-10 categories.
This is the CNN structure of Figure 16.1:
Figure 8: CNN structure for image classification
The Python file Ex__3_create_network.py
contains the code to create the CNN model:
#!/usr/bin/env python
"""Ex__3_create_network.py: Answer to Ch 16 Ex 3."""
from tensorflow.keras import datasets, layers, models,
optimizers, losses
def load_dataset():
(train_images, train_labels),
(test_images, test_labels) =
datasets.cifar10.load_data()
# Normalize pixel values to the range 0-1
train_images = train_images / 255.0
test_images = test_images / 255.0
return train_images, train_labels,
test_images, test_labels
def create_model():
# Each image is 32x32 pixels with 3 RGB color planes
image_shape = (32, 32, 3)
# The convolutional filter kernel size is 3x3 pixels
conv_filter_size = (3, 3)
# Number of convolutional filters in each layer
filters_layer1 = 32
filters_layer2 = 64
filters_layer3 = 64
# Perform max pooling over 2x2 pixel regions
pooling_size = (2, 2)
# Number of neurons in each of the dense layers
hidden_neurons = 64
output_neurons = 10
model = models.Sequential([
# First convolutional layer followed by max pooling
layers.Conv2D(filters_layer1, conv_filter_size,
activation='relu', input_shape=image_shape),
layers.MaxPooling2D(pooling_size),
# Second convolutional layer followed by max pooling
layers.Conv2D(filters_layer2, conv_filter_size,
activation='relu'),
layers.MaxPooling2D(pooling_size),
# Third convolutional layer followed by flattening
layers.Conv2D(filters_layer3, conv_filter_size,
activation='relu'),
layers.Flatten(),
# Dense layer followed by the output layer
layers.Dense(hidden_neurons, activation='relu'),
layers.Dense(output_neurons)
])
model.compile(optimizer=optimizers.Adam(),
loss=losses.SparseCategoricalCrossentropy(
from_logits=True), metrics=['accuracy'])
return model
if __name__ == '__main__':
train_images, train_labels, test_images,
test_labels = load_dataset()
model = create_model()
model.summary()
To execute the program, assuming Python is installed and is in your path, execute the command python Ex__3_create_network.py
.
Note: You can ignore any warning messages about not having a GPU present if your system doesn’t have one. The code will execute on the system processor if a GPU is not configured for use with TensorFlow.
This is the output of a test run:
C:>Ex__3_create_network.py
2021-12-12 19:26:07.938984: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-12-12 19:26:08.282366: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/
device:GPU:0 with 3617 MB memory: -> device: 0, name: Quadro P2200, pci bus id: 0000:01:00.0, compute capability: 6.1
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 30, 30, 32) 896
max_pooling2d (MaxPooling2D (None, 15, 15, 32) 0
)
conv2d_1 (Conv2D) (None, 13, 13, 64) 18496
max_pooling2d_1 (MaxPooling (None, 6, 6, 64) 0
2D)
conv2d_2 (Conv2D) (None, 4, 4, 64) 36928
flatten (Flatten) (None, 1024) 0
dense (Dense) (None, 64) 65600
dense_1 (Dense) (None, 10) 650
=================================================================
Total params: 122,570
Trainable params: 122,570
Non-trainable params: 0
_________________________________________________________________
C:>
Create a program using the TensorFlow library that trains the CNN developed in Exercise 3 and test the resulting model using the CIFAR-10 test images. Determine the percentage of test images that the CNN classifies correctly.
The Python file Ex__4_train_model.py
contains the code to create, train, and test the CNN model:
#!/usr/bin/env python
"""Ex__4_train_model.py: Answer to Ch 16 Ex 4."""
from tensorflow.keras import datasets, layers, models, optimizers, losses
import matplotlib.pyplot as plt
def load_dataset():
(train_images, train_labels),
(test_images, test_labels) =
datasets.cifar10.load_data()
# Normalize pixel values to the range 0-1
train_images = train_images / 255.0
test_images = test_images / 255.0
return train_images, train_labels,
test_images, test_labels
def create_model():
# Each image is 32x32 pixels with 3 RGB color planes
image_shape = (32, 32, 3)
# The convolutional filter kernel size is 3x3 pixels
conv_filter_size = (3, 3)
# Number of convolutional filters in each layer
filters_layer1 = 32
filters_layer2 = 64
filters_layer3 = 64
# Perform max pooling over 2x2 pixel regions
pooling_size = (2, 2)
# Number of neurons in each of the dense layers
hidden_neurons = 64
output_neurons = 10
model = models.Sequential([
# First convolutional layer followed by max pooling
layers.Conv2D(filters_layer1, conv_filter_size,
activation='relu', input_shape=image_shape),
layers.MaxPooling2D(pooling_size),
# Second convolutional layer followed by max pooling
layers.Conv2D(filters_layer2, conv_filter_size,
activation='relu'),
layers.MaxPooling2D(pooling_size),
# Third convolutional layer followed by flattening
layers.Conv2D(filters_layer3, conv_filter_size,
activation='relu'),
layers.Flatten(),
# Dense layer followed by the output layer
layers.Dense(hidden_neurons, activation='relu'),
layers.Dense(output_neurons)
])
model.compile(optimizer=optimizers.Adam(),
loss=losses.SparseCategoricalCrossentropy(
from_logits=True), metrics=['accuracy'])
return model
def train_model(train_images, train_labels,
test_images, test_labels, model):
history = model.fit(train_images, train_labels,
epochs=10, validation_data=(test_images, test_labels))
test_loss, test_acc = model.evaluate(test_images,
test_labels, verbose=2)
return history, test_acc
def plot_model_accuracy(history):
plt.figure()
plt.plot(history.history['accuracy'], label='Accuracy')
plt.plot(history.history['val_accuracy'],
label = 'Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='upper left')
plt.grid()
plt.show()
if __name__ == '__main__':
train_images, train_labels, test_images,
test_labels = load_dataset()
model = create_model()
history, test_acc = train_model(train_images,
train_labels, test_images, test_labels, model)
print()
print('='*31)
print('| Validation accuracy: {:.2f}% |'.
format(100*test_acc))
print('='*31)
plot_model_accuracy(history)
To execute the program, assuming Python is installed and is in your path, execute the command python Ex__4_train_model.py
.
Note: You can ignore any warning messages about not having a GPU present if your system doesn’t have one. The code will execute on the system processor if a GPU is not configured for use with TensorFlow.
Your results should indicate an accuracy of approximately 70%. For such a simple CNN, this is a tremendous improvement over the accuracy of random guessing, which would be 10%.
This is the output of a test run:
2021-12-12 17:55:19.402677: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-12-12 17:55:19.802026: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3617 MB memory: -> device: 0, name: Quadro P2200, pci bus id: 0000:01:00.0, compute capability: 6.1
Epoch 1/10
2021-12-12 17:55:21.475358: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8301
1563/1563 [==============================] - 9s 5ms/step - loss: 1.5032 - accuracy: 0.4521 - val_loss: 1.2326 - val_accuracy: 0.5559
Epoch 2/10
1563/1563 [==============================] - 7s 5ms/step - loss: 1.1306 - accuracy: 0.5996 - val_loss: 1.0361 - val_accuracy: 0.6318
Epoch 3/10
1563/1563 [==============================] - 8s 5ms/step - loss: 0.9704 - accuracy: 0.6589 - val_loss: 1.0053 - val_accuracy: 0.6517
Epoch 4/10
1563/1563 [==============================] - 7s 5ms/step - loss: 0.8831 - accuracy: 0.6904 - val_loss: 0.8999 - val_accuracy: 0.6883
Epoch 5/10
1563/1563 [==============================] - 7s 5ms/step - loss: 0.8036 - accuracy: 0.7177 - val_loss: 0.8924 - val_accuracy: 0.6956
Epoch 6/10
1563/1563 [==============================] - 7s 5ms/step - loss: 0.7514 - accuracy: 0.7374 - val_loss: 0.9180 - val_accuracy: 0.6903
Epoch 7/10
1563/1563 [==============================] - 7s 5ms/step - loss: 0.7020 - accuracy: 0.7548 - val_loss: 0.8755 - val_accuracy: 0.7074
Epoch 8/10
1563/1563 [==============================] - 7s 5ms/step - loss: 0.6599 - accuracy: 0.7694 - val_loss: 0.8505 - val_accuracy: 0.7116
Epoch 9/10
1563/1563 [==============================] - 8s 5ms/step - loss: 0.6180 - accuracy: 0.7842 - val_loss: 0.8850 - val_accuracy: 0.7058
Epoch 10/10
1563/1563 [==============================] - 8s 5ms/step - loss: 0.5825 - accuracy: 0.7943 - val_loss: 0.8740 - val_accuracy: 0.7128
313/313 - 1s - loss: 0.8740 - accuracy: 0.7128 - 648ms/epoch - 2ms/step
===============================
| Validation accuracy: 71.28% |
===============================
This figure displays the classification accuracy of the CNN on the training images (Accuracy) and on the test images (Validation Accuracy) after each of the 10 training epochs:
Figure 10: CNN image classification accuracy
Install the Qiskit quantum processor software development framework by following the instructions at https://qiskit.org/documentation/getting_started.html. The instructions suggest installation of the Anaconda (https://www.anaconda.com/) data science and machine learning toolset. After installing Anaconda, create a Conda virtual environment named qiskitenv
to contain your work on quantum code and install Qiskit in this environment with the command pip install qiskit
. Be sure to install the optional visualization dependencies with the command pip install qiskit-terra[visualization]
.
anaconda
and clicking on Anaconda Prompt when it appears in the search list. A console window will appear.qiskitenv
with the following commands. Install any recommended packages:
conda create -n qiskitenv python=3.8
conda activate qiskitenv
pip install qiskit
pip install qiskit-terra[visualization]
Create a free IBM Quantum account at https://quantum-computing.ibm.com/. Locate your IBM Quantum Services API token at https://quantum-computing.ibm.com/account and install it in your local environment using the instructions at https://qiskit.org/documentation/getting_started.html.
qiskitenv
environment you created in Exercise 1.MY_TOKEN
with the token you copied to the clipboard in step 2:
python
import qiskit
from qiskit import IBMQ
IBMQ.save_account('MY_TOKEN')
Work through the example quantum program at https://qiskit.org/documentation/tutorials/circuits/1_getting_started_with_qiskit.html. This example creates a quantum circuit containing 3 qubits that implements a Greenberger–Horne–Zeilinger (GHZ) state. The GHZ state exhibits key properties of quantum entanglement. Execute the code in a simulation environment on your computer.
anaconda
in the Windows search box and click on Anaconda prompt when it appears in the search list. A console window will appear.qiskitenv
environment with this command:
conda activate qiskitenv
python
import numpy as np
from qiskit import *
circ = QuantumCircuit(3)
# Add an H gate to qubit 0, creating superposition
circ.h(0)
# Add a CX (CNOT) gate. Qubit 0 is control and qubit 1 is target
circ.cx(0,1)
# Add a CX (CNOT) gate. Qubit 0 is control and qubit 2 is target
circ.cx(0,2)
# Add a measurement to each of the qubits
meas = QuantumCircuit(3, 3)
meas.barrier(range(3))
meas.measure(range(3),range(3))
# Combine the two circuits
circ.add_register(meas.cregs[0])
qc = circ.compose(meas)
qc.draw()
The output of this command should appear as follows:
>>> qc.draw()
┌───┐ ░ ┌─┐
q_0: ┤ H ├──■────■───░─┤M├──────
└───┘┌─┴─┐ │ ░ └╥┘┌─┐
q_1: ─────┤ X ├──┼───░──╫─┤M├───
└───┘┌─┴─┐ ░ ║ └╥┘┌─┐
q_2: ──────────┤ X ├─░──╫──╫─┤M├
└───┘ ░ ║ ║ └╥┘
c: 3/═══════════════════╩══╩══╩═
0 1 2
>>>
qasm_simulator
simulator. The shots
parameter provides a count of the number of times the circuit will be executed to collect statistical results:
backend_sim = Aer.get_backend('qasm_simulator')
job_sim = backend_sim.run(transpile(qc, backend_sim), shots=1024)
result_sim = job_sim.result()
counts_sim = result_sim.get_counts(qc)
counts_sim
>>> counts_sim
{'111': 506, '000': 518}
>>>
Execute the code from Exercise 3 on an IBM quantum computer.
from qiskit import IBMQ
IBMQ.load_account()
provider = IBMQ.get_provider(group='open')
provider.backends()
ibmq_bogota
computer.shots
parameter provides a count of the number of times the circuit will be executed to collect statistical results:
backend = provider.get_backend('ibmq_bogota')
from qiskit.tools.monitor import job_monitor
job_exp = execute(qc, backend=backend, shots=1024)
job_monitor(job_exp)
After the run completes, you will see the following output line:
Job Status: job has successfully run
result_exp = job_exp.result()
counts_exp = result_exp.get_counts(qc)
counts_exp
Approximately 50% of the time, the output bit string for this circuit should be 000, and the other 50% of the time it should be 111. However, these systems are noisy, intermediate-scale quantum (NISQ) computers. You should see results similar (but not identical) to these:
>>> counts_exp
{'000': 467, '001': 15, '010': 23, '011': 17, '100': 21, '101': 127, '110': 16, '111': 338}
>>>
Join the book’s Discord workspace for a monthly Ask me Anything session with the author: https://discord.gg/7h8aNRhRuY