diff --git a/assets/HTMFS_Strong_Consistency_Comes_for_Free_with_1682647018871_0.edn b/assets/HTMFS_Strong_Consistency_Comes_for_Free_with_1682647018871_0.edn index e362042..778b3f7 100644 --- a/assets/HTMFS_Strong_Consistency_Comes_for_Free_with_1682647018871_0.edn +++ b/assets/HTMFS_Strong_Consistency_Comes_for_Free_with_1682647018871_0.edn @@ -493,5 +493,478 @@ :height 1900.8}), :page 5}, :content {:text "3.1 HOP"}, - :properties {:color "yellow"}}], + :properties {:color "yellow"}} + {:id #uuid "64533b91-7e2b-448b-bb4f-807cc7cb5a77", + :page 5, + :position {:bounding {:x1 0, + :y1 625.848274230957, + :x2 1041.702880859375, + :y2 1331.830451965332, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 761.5968017578125, + :y1 625.848274230957, + :x2 1041.702880859375, + :y2 650.4196853637695, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 592.7053985595703, + :y1 648.160774230957, + :x2 779.3180541992188, + :y2 672.7321853637695, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 0, + :y1 1307.2590408325195, + :x2 0, + :y2 1331.830451965332, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 5}, + :content {:text "first perform all reads and invisible writes outside the RTM"}, + :properties {:color "yellow"}} + {:id #uuid "64533cf1-c232-4630-8cf9-88cf4dbfe0c2", + :page 5, + :position {:bounding {:x1 666.8732299804688, + :y1 759.7500305175781, + :x2 788.893310546875, + :y2 784.3214416503906, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 666.8732299804688, + :y1 759.7500305175781, + :x2 788.893310546875, + :y2 784.3214416503906, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 5}, + :content {:text "sequence count"}, + :properties {:color "yellow"}} + {:id #uuid "64534050-8459-4719-866e-612b76560e00", + :page 6, + :position {:bounding {:x1 100.79464721679688, + :y1 1121.4465026855469, + :x2 299.19744873046875, + :y2 1150.58935546875, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 100.79464721679688, + :y1 1121.4465026855469, + :x2 299.19744873046875, + :y2 1150.58935546875, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 128.6875, + :y1 1124.8750610351562, + :x2 151.02301025390625, + :y2 1147.1875610351562, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 6}, + :content {:text "3.2 File Operations"}, + :properties {:color "yellow"}} + {:id #uuid "6453427e-ce5a-4746-960b-45f9546e8b37", + :page 5, + :position {:bounding {:x1 593.3750305175781, + :y1 1169.4732666015625, + :x2 897.7872924804688, + :y2 1194.044677734375, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 593.3750305175781, + :y1 1169.4732666015625, + :x2 897.7872924804688, + :y2 1194.044677734375, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 5}, + :content {:text "Discussion of concurrency correctness."}, + :properties {:color "yellow"}} + {:id #uuid "64534bc6-4bde-47c6-9fa2-3a085ff59c66", + :page 6, + :position {:bounding {:x1 100.79464721679688, + :y1 1159.5893249511719, + :x2 240.79701232910156, + :y2 1184.1607666015625, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 100.79464721679688, + :y1 1159.5893249511719, + :x2 240.79701232910156, + :y2 1184.1607666015625, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 137.99107360839844, + :y1 1163.0178985595703, + :x2 156.61558532714844, + :y2 1181.6161193847656, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 6}, + :content {:text "3.2.1 Data Read"}, + :properties {:color "yellow"}} + {:id #uuid "64534cee-e35f-4e07-a79d-9185e36b57a2", + :page 6, + :position {:bounding {:x1 593.3750381469727, + :y1 600.6696472167969, + :x2 738.5257873535156, + :y2 625.2410888671875, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 593.3750381469727, + :y1 600.6696472167969, + :x2 738.5257873535156, + :y2 625.2410888671875, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 630.5625457763672, + :y1 604.0982208251953, + :x2 649.1870574951172, + :y2 622.6964416503906, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 6}, + :content {:text "3.2.2 Data Write"}, + :properties {:color "yellow"}} + {:id #uuid "64534d02-3d6b-4dd4-8d3f-e86767190f08", + :page 7, + :position {:bounding {:x1 100.79464721679688, + :y1 430.9553985595703, + :x2 238.2342071533203, + :y2 455.5268096923828, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 100.79464721679688, + :y1 430.9553985595703, + :x2 238.2342071533203, + :y2 455.5268096923828, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 137.99107360839844, + :y1 434.38392639160156, + :x2 156.61558532714844, + :y2 452.9821319580078, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 7}, + :content {:text "3.2.3 Allocation"}, + :properties {:color "yellow"}} + {:id #uuid "64534d0c-9fe2-4817-b510-5d2da7d55dbf", + :page 7, + :position {:bounding {:x1 100.79464721679688, + :y1 898.3750305175781, + :x2 354.5528564453125, + :y2 927.5179138183594, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 100.79464721679688, + :y1 898.3750305175781, + :x2 354.5528564453125, + :y2 927.5179138183594, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 128.6875, + :y1 901.8036193847656, + :x2 151.02301025390625, + :y2 924.1161193847656, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 7}, + :content {:text "3.3 Directory Operations"}, + :properties {:color "yellow"}} + {:id #uuid "64534d0f-1882-4633-9cd4-fce9d411aeb9", + :page 7, + :position {:bounding {:x1 100.79464721679688, + :y1 936.0982666015625, + :x2 240.4675750732422, + :y2 960.669677734375, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 100.79464721679688, + :y1 936.0982666015625, + :x2 240.4675750732422, + :y2 960.669677734375, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 137.99107360839844, + :y1 939.52685546875, + :x2 156.61558532714844, + :y2 958.1250610351562, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 7}, + :content {:text "3.3.1 Path Walk"}, + :properties {:color "yellow"}} + {:id #uuid "64534d19-b941-4c04-a084-2d8a09d00883", + :page 7, + :position {:bounding {:x1 593.3750381469727, + :y1 465.1161117553711, + :x2 795.0186767578125, + :y2 489.6875228881836, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 593.3750381469727, + :y1 465.1161117553711, + :x2 795.0186767578125, + :y2 489.6875228881836, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 630.5625457763672, + :y1 468.54463958740234, + :x2 649.1870574951172, + :y2 487.14290618896484, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 7}, + :content {:text "3.3.2 Directory Updates"}, + :properties {:color "yellow"}} + {:id #uuid "64534d23-1327-40ef-b58e-7c5a01559552", + :page 8, + :position {:bounding {:x1 100.79464721679688, + :y1 504.79463958740234, + :x2 310.562255859375, + :y2 533.9375228881836, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 100.79464721679688, + :y1 504.79463958740234, + :x2 310.562255859375, + :y2 533.9375228881836, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 128.6875, + :y1 508.22322845458984, + :x2 151.02301025390625, + :y2 530.5357284545898, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 8}, + :content {:text "3.4 Other File Types"}, + :properties {:color "yellow"}} + {:id #uuid "64534d33-2bfe-4817-a5bd-0cc3534584d7", + :page 8, + :position {:bounding {:x1 100.79464721679688, + :y1 640.2678833007812, + :x2 309.95745849609375, + :y2 669.4107360839844, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 100.79464721679688, + :y1 640.2678833007812, + :x2 309.95745849609375, + :y2 669.4107360839844, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 128.6875, + :y1 643.6964416503906, + :x2 151.02301025390625, + :y2 666.0089416503906, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 8}, + :content {:text "3.5 The Timestamps"}, + :properties {:color "yellow"}} + {:id #uuid "64534d37-b14f-4943-92c0-7bb4823fbd50", + :page 8, + :position {:bounding {:x1 100.79464721679688, + :y1 1088.96435546875, + :x2 404.5784912109375, + :y2 1118.107177734375, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 100.79464721679688, + :y1 1088.96435546875, + :x2 404.5784912109375, + :y2 1118.107177734375, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 128.6875, + :y1 1092.3929443359375, + :x2 151.02301025390625, + :y2 1114.7054443359375, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 8}, + :content {:text "3.6 The Special Case: Rename"}, + :properties {:color "yellow"}} + {:id #uuid "64534d45-e498-427d-b4ef-74eaf56f7aae", + :page 9, + :position {:bounding {:x1 100.79464721679688, + :y1 498.08929443359375, + :x2 286.7929382324219, + :y2 527.232177734375, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 100.79464721679688, + :y1 498.08929443359375, + :x2 286.7929382324219, + :y2 527.232177734375, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 111.95536041259766, + :y1 501.51788330078125, + :x2 134.29086303710938, + :y2 523.8303833007812, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 9}, + :content {:text "4 Implementation"}, + :properties {:color "yellow"}} + {:id #uuid "645357b1-b9ab-456c-87b3-c1047ec5ce59", + :page 6, + :position {:bounding {:x1 0, + :y1 811.6785888671875, + :x2 1041.711669921875, + :y2 1414.8214721679688, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 947.4280700683594, + :y1 811.6785888671875, + :x2 1041.711669921875, + :y2 836.2500305175781, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 593.3750381469727, + :y1 834.0000305175781, + :x2 1041.6945877075195, + :y2 858.5714416503906, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 593.3750381469727, + :y1 837.4285888671875, + :x2 1041.6945877075195, + :y2 856.0268249511719, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 593.3750381469727, + :y1 856.3125305175781, + :x2 730.7262649536133, + :y2 880.8839416503906, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 0, + :y1 1373.65185546875, + :x2 0, + :y2 1398.2232666015625, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 0, + :y1 1390.2500610351562, + :x2 0, + :y2 1414.8214721679688, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 6}, + :content {:text "convert data updates to metadata updates that can be embedded in the RTM transactions."}, + :properties {:color "yellow"}} + {:id #uuid "64535855-08bc-484d-951e-6cf191d5729f", + :page 6, + :position {:bounding {:x1 611.9732513427734, + :y1 879.3661346435547, + :x2 708.9877014160156, + :y2 903.9375610351562, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 611.9732513427734, + :y1 879.3661346435547, + :x2 708.9877014160156, + :y2 903.9375610351562, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 6}, + :content {:text "Small writes, "}, + :properties {:color "yellow"}} + {:id #uuid "64535ce3-9927-4e22-b2ed-9c396c3b43f2", + :page 6, + :position {:bounding {:x1 0, + :y1 1214.1072082519531, + :x2 1041.6976928710938, + :y2 1696.9911499023438, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 802.2041625976562, + :y1 1214.1072082519531, + :x2 1041.6976928710938, + :y2 1238.6786499023438, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 593.3750381469727, + :y1 1236.4197082519531, + :x2 748.9095840454102, + :y2 1260.9911499023438, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 0, + :y1 1672.4197387695312, + :x2 0, + :y2 1696.9911499023438, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 6}, + :content {:text "But the blocks may have leaked after a system crash. "}, + :properties {:color "red"}} + {:id #uuid "64535d27-c876-4481-856e-8962e5ba9b16", + :page 7, + :position {:bounding {:x1 257.77557373046875, + :y1 485.0536003112793, + :x2 436.4731140136719, + :y2 509.6250114440918, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 257.77557373046875, + :y1 485.0536003112793, + :x2 436.4731140136719, + :y2 509.6250114440918, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 7}, + :content {:text " temporal allocating list"}, + :properties {:color "yellow"}} + {:id #uuid "64535f97-b733-4522-a2d1-dbfc8837799e", + :page 7, + :position {:bounding {:x1 0, + :y1 327.9643020629883, + :x2 549.4309692382812, + :y2 733.5357360839844, + :width 1142.3999999999999, + :height 1478.3999999999999}, + :rects ({:x1 0, + :y1 327.9643020629883, + :x2 0, + :y2 352.5357322692871, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 0, + :y1 344.5625171661377, + :x2 0, + :y2 369.1339473724365, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 338.12518310546875, + :y1 664.3303833007812, + :x2 549.4309692382812, + :y2 688.9018249511719, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 100.79464721679688, + :y1 686.6518249511719, + :x2 549.1180419921875, + :y2 711.2232360839844, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 100.79464721679688, + :y1 690.0803833007812, + :x2 549.1180419921875, + :y2 708.6786193847656, + :width 1142.3999999999999, + :height 1478.3999999999999} + {:x1 100.79464721679688, + :y1 708.9643249511719, + :x2 408.1436767578125, + :y2 733.5357360839844, + :width 1142.3999999999999, + :height 1478.3999999999999}), + :page 7}, + :content {:text "the file system crashes after a reference to a block of data has been removed (when this block of memory has not yet been freed)"}, + :properties {:color "red"}}], :extra {:page 7}} diff --git a/journals/2023_05_04.md b/journals/2023_05_04.md new file mode 100644 index 0000000..3058a40 --- /dev/null +++ b/journals/2023_05_04.md @@ -0,0 +1,3 @@ +- TODO Paper1-HTMFS Implementation 看完 +- TODO 模式识别作业提交 +- TODO 写一会编译器 \ No newline at end of file diff --git a/logseq/bak/logseq/custom/2023-04-30T03_42_43.088Z.Desktop.css b/logseq/bak/logseq/custom/2023-05-04T04_57_33.197Z.Desktop.css similarity index 99% rename from logseq/bak/logseq/custom/2023-04-30T03_42_43.088Z.Desktop.css rename to logseq/bak/logseq/custom/2023-05-04T04_57_33.197Z.Desktop.css index 849f4f6..2a7e5d8 100644 --- a/logseq/bak/logseq/custom/2023-04-30T03_42_43.088Z.Desktop.css +++ b/logseq/bak/logseq/custom/2023-05-04T04_57_33.197Z.Desktop.css @@ -317,13 +317,14 @@ mark { font-size: inherit; text-decoration-style: solid; text-decoration: underline; - text-decoration-color: var(--ls-primary-background-color); + text-decoration-color: rgba(255,198,0,0.7); text-decoration-thickness: 4px; text-underline-offset: -1px; text-decoration-skip-ink: none; } - - +html[data-theme="dark"] mark { + text-decoration-color: rgba(255,198,0,0.7); +} /* fix color page-reference when mark text */ mark .page-ref, mark .page-reference .bracket { diff --git a/logseq/bak/pages/hls__Computer_Organization_and_Design_1681729306797_0/2023-05-04T04_18_30.202Z.Desktop.md b/logseq/bak/pages/hls__Computer_Organization_and_Design_1681729306797_0/2023-05-04T04_18_30.202Z.Desktop.md new file mode 100644 index 0000000..cc1fe32 --- /dev/null +++ b/logseq/bak/pages/hls__Computer_Organization_and_Design_1681729306797_0/2023-05-04T04_18_30.202Z.Desktop.md @@ -0,0 +1,1448 @@ +file:: [Computer_Organization_and_Design_1681729306797_0.pdf](../../../../assets/Computer_Organization_and_Design_1681729306797_0.pdf) +file-path:: ../../../../assets/Computer_Organization_and_Design_1681729306797_0.pdf + +- # Computer Abstractions and Technology + ls-type:: annotation + hl-page:: 25 + hl-color:: yellow + id:: 643d2848-6edf-4c05-92c7-4a7de1b9cd22 +- Classes of Computing Applications and Their Characteristics + ls-type:: annotation + hl-page:: 28 + hl-color:: yellow + id:: 643e2b9c-0bc2-4b02-b2b1-33e25539d5b9 +- Below Your Program + ls-type:: annotation + hl-page:: 36 + hl-color:: yellow + id:: 643ea1cf-af0e-45ba-97b3-376fd21ee1e3 + collapsed:: true + - From a High-Level Language to the Language of Hardware + ls-type:: annotation + hl-page:: 37 + hl-color:: yellow + id:: 643ea1d7-cd7d-4e81-8d6f-c268aab04f68 +- Under the Covers + ls-type:: annotation + hl-page:: 39 + hl-color:: yellow + id:: 643ea295-e170-403a-a43d-71777bb41d9b + collapsed:: true + - The five classic components of a computer are input, output, memory, datapath, and control + ls-type:: annotation + hl-page:: 40 + hl-color:: yellow + id:: 643ea2f7-1fa7-4c12-ad2d-34a90d6968b7 + - liquid crystal displays (LCDs) + hl-page:: 41 + ls-type:: annotation + id:: 643ea91c-7643-4563-9341-f85096313a3b + hl-color:: yellow + - The LCD is not the source of light but instead controls the transmission of light. There is a background light source and the LCD has many rods which bend light to make it pass through. When applied with a current, the rod no more bends light thus controlling the pixel. + - an active matrix that has a tiny transistor switch at each pixel to precisely control current and make sharper images + ls-type:: annotation + hl-page:: 41 + hl-color:: yellow + id:: 643ead73-e1a6-4f10-82c5-2760a4ce839f + - instruction set architecture + hl-page:: 45 + ls-type:: annotation + id:: 643eb029-9fe9-4013-a4a2-1365e195333b + hl-color:: yellow + - interface between the hardware and low-level software, distinguish architecture from implementation +- Technologies for Building Processors and Memory + ls-type:: annotation + hl-page:: 47 + hl-color:: yellow + id:: 643eb311-6b10-4fa3-9aa3-dfd5a59acf2c + collapsed:: true + - Semiconductor, silicon: add materials to silicon that allow tiny areas to transform into one of three devices: Excellent conductor, Excellent insulator and Transistor (conduct/insulate at some conditions) + hl-page:: 48 + ls-type:: annotation + id:: 643eb66d-0294-46ab-a182-20021b2495c5 + hl-color:: yellow + - Silicon ingot sliced into Blank wafers, processed into Patterned wafers, and then Tested wafer, diced into Tested dies, bonded to package, finally Tested packaged dies + - die: Rectangular sections cut from a wafer (actually chip) + - yield: Percentage of good dies from the total dies on the wafer +- Performance + ls-type:: annotation + hl-page:: 51 + hl-color:: yellow + id:: 643ec3be-027e-48b5-90e0-cd4a5e901691 + collapsed:: true + - response/execution time: time between the start and completion of a task + hl-page:: 52 + ls-type:: annotation + id:: 643fb234-d566-4913-a03b-c574e6a623c4 + hl-color:: yellow + - $\text{Performance}_X = \frac{1}{\text{Execution time}_X}$ + - Relative Performance: A is ==n times as fast as== B, which means the same program runs for 1/n time on A of that on B + hl-page:: 54 + ls-type:: annotation + id:: 643fe045-80bb-4c47-b601-3fdc9175581d + hl-color:: yellow + - throughput: the total amount of work done in a given time + hl-page:: 53 + ls-type:: annotation + id:: 643fb242-9401-42fa-bddc-d93e571b6e99 + hl-color:: yellow + - Measuring Performance + ls-type:: annotation + hl-page:: 55 + hl-color:: yellow + id:: 6440d1fd-a2c1-4d4c-a493-3b5c7d91448e + - Elapsed time: total time to complete a task, including RAM access, IO and other overhead. + - CPU time: time that CPU spends on computing for this task and not includes IO or waiting for schedule + - user CPU time + - system CPU time: time that OS performing tasks on behalf of the program (syscall?) + - CPU Performance and Its Factors + ls-type:: annotation + hl-page:: 56 + hl-color:: yellow + id:: 6440d4c6-9dc2-4712-9d25-6386325581e5 + - clock cycles: discrete time intervals + - clock period: length of a clock cycle + - clock rate: inverse of the clock period + - For a specific program, CPU time = CPU clock cycles $\times$ Clock cycle time = CPU clock cycles $\div$ Clock rate + - Instruction Performance + ls-type:: annotation + hl-page:: 58 + hl-color:: yellow + id:: 6440d70f-cde0-41ca-af1b-82d5a777a7a8 + - CPU clock cycles = Instruction count $\times$ CPI + - CPI (clock Cycles Per Instruction): average number of cycles each instruction takes to execute (for one program) + - compare two different implementations of the same ISA + - The Classic CPU Performance Equation + ls-type:: annotation + hl-page:: 59 + hl-color:: yellow + id:: 6440d99c-e080-4352-8c2d-7d35e897a2ee + - CPU time = Instruction count $\times$ CPI $\div$ Clock rate + - The formulas separates 3 key factors affecting the performance + - The only complete and reliable measure of computer performance is time. + hl-page:: 61 + ls-type:: annotation + id:: 6440dc59-a80f-4185-9692-8e4122cad4b4 + hl-color:: yellow + - CPI depends on a wide variety of design details in the computer + hl-page:: 61 + ls-type:: annotation + id:: 6440dcd2-ae75-4a59-a2c5-aff6e3bf7953 + hl-color:: yellow +- The Power Wall + ls-type:: annotation + hl-page:: 63 + hl-color:: yellow + id:: 6440df39-4ac7-4efc-b59a-64db6354ca0f + collapsed:: true + - dynamic energy: The energy consumed when transistors switch states, primary source of energy consumption for CMOS. + - The energy of a single transition: $\text{Energy} \propto \frac12 \times \text{Capacitive load} \times \text{Voltage}^2$ + - The power required per transistor: $\text{Power} \propto \frac12 \times \text{Capacitive load} \times \text{Voltage}^2 \times \text{Frequency switched}$ + - Frequency switched is a function of the *clock rate* + - Capacitive load is a function of *fanout* (number of transistors connected to an output) and the technology (capacitance of wires and transistors). + - Main way to reduce power is to lower the *voltage*. + - There is problem with low voltage: this makes the capacitor leakage increase. (static energy) +- The Sea Change: The Switch from Uniprocessors to Multiprocessors + ls-type:: annotation + hl-page:: 66 + hl-color:: yellow + id:: 6440e9ca-16a4-48ed-9648-adafb74ce097 + collapsed:: true + - This section is about the difficulty of parallel programming and relative materials. +- Fallacies and Pitfalls + ls-type:: annotation + hl-page:: 72 + hl-color:: yellow + id:: 6440ea56-fdd5-426f-9a76-d5b5a7465c55 + collapsed:: true + - Amdahl's Law: $\text{Execution time after improvement} = \frac{\text{Execution time affected by improvement} }{\text{Amount of improvement}} + \text{Execution time unaffected}$ + hl-page:: 72 + ls-type:: annotation + id:: 6441179d-ae59-49b4-8903-874cb5b7c9cd + hl-color:: yellow + - Thus, we CANNOT expect ==improvement of one aspect== of a computer to ==increase overall performance by an amount proportional== to the size of improvement. + - Computers at low utilization don't necessarily use little power, or in other words, power consumption is not proportional to the system's load. + hl-page:: 73 + ls-type:: annotation + id:: 6441212c-596a-463c-a47a-04478b16268b + hl-color:: yellow + - MIPS (million instructions per second) = $\frac{\text{Instruction count}}{\text{Execution time} \times 10^6} = \frac{\text{Clock rate}}{\text{CPI} \times 10^6}$ + hl-page:: 74 + ls-type:: annotation + id:: 64412369-dd7a-4a26-9979-be7179f38df6 + hl-color:: yellow + - Problem 1: it doesn't take into account the *Instruction count*, or the capability of each instruction. We should not compare computers with different ISAs. + - Problem 2: MIPS varies between programs even on the same computer. + - Problem 3: MIPS can vary independently from performance. +- Word List 1 + collapsed:: true + - omnipresent 无所不在的 ubiquitous + hl-page:: 27 + ls-type:: annotation + id:: 643e2b82-f5a5-411e-9571-d494858c175a + hl-color:: green + - credo 信条,教义 + ls-type:: annotation + hl-page:: 30 + hl-color:: green + id:: 643e473a-2f03-419b-ad3a-8309c33dff15 + - unraveling 解开;阐明; + hl-page:: 31 + ls-type:: annotation + id:: 643e47b3-cc6c-4fd1-83a9-0510b16a5e9c + hl-color:: green + - acronyms 首字母缩略词 + ls-type:: annotation + hl-page:: 32 + hl-color:: green + id:: 643e485f-8de8-41bf-86ac-812ba202f4c8 + - leverage 影响力;杠杆作用 + hl-page:: 33 + ls-type:: annotation + id:: 643e4871-3ebb-4578-9227-b40a534adeac + hl-color:: green + - intrinsic 固有的, 内在的, 本质的 + ls-type:: annotation + hl-page:: 33 + hl-color:: green + id:: 643e4882-a5ea-4bff-9b5f-17f585313142 + - weave 编织;杜撰 + hl-page:: 34 + ls-type:: annotation + id:: 643e492d-5e63-4b9b-93f7-4f44bf50158e + hl-color:: green + - rod 杆;竿;棒 + ls-type:: annotation + hl-page:: 41 + hl-color:: green + id:: 643ea931-ff7e-4bd7-96d1-b7eab4dcc563 + - helix n. 螺旋 + hl-page:: 41 + ls-type:: annotation + id:: 643ea93a-fa50-486a-b74a-d96f2a4df9aa + hl-color:: green + - raster 光栅 + ls-type:: annotation + hl-page:: 41 + hl-color:: green + id:: 643ea8f8-7e5f-42e3-a04a-01cd91f25d13 + - brawn 体力;发达的肌肉 + ls-type:: annotation + hl-page:: 42 + hl-color:: green + id:: 643eaede-f413-4717-9136-e28363909bb3 + - quadruple 四倍的;四重的; + hl-page:: 48 + ls-type:: annotation + id:: 643eb37e-2927-4100-b8b3-c76bbe5450f4 + hl-color:: green + - slam 砰地关上(门或窗);抨击 + hl-page:: 65 + ls-type:: annotation + id:: 6440e306-7625-4883-b3f0-fbdca42d92e3 + hl-color:: green + - faucet 水龙头 + ls-type:: annotation + hl-page:: 65 + hl-color:: green + id:: 6440e5e4-e02f-43e3-9311-64f8b6d67f75 + - unwieldy + ls-type:: annotation + hl-page:: 65 + hl-color:: green + id:: 6440e73c-8061-47b2-bc37-522db24f1707 + - startling + ls-type:: annotation + hl-page:: 66 + hl-color:: green + id:: 6440e8c9-0024-4751-8233-43e8cea16699 + - stiffer + ls-type:: annotation + hl-page:: 68 + hl-color:: green + id:: 6440e99d-0c7d-4acb-9ba7-9172e1d383d8 + - ensnared + ls-type:: annotation + hl-page:: 72 + hl-color:: green + id:: 6440ec21-aef2-4494-8b50-d16a83c0d9bb + - corollary + ls-type:: annotation + hl-page:: 72 + hl-color:: green + id:: 6441170f-b51b-453a-b612-0b71b2b6032d + - demoralize + ls-type:: annotation + hl-page:: 72 + hl-color:: green + id:: 64411718-be78-469d-9b83-0dfd9c83338b + - plague + ls-type:: annotation + hl-page:: 72 + hl-color:: green + id:: 64411720-4c6a-49ce-8a72-4aa19e7b8482 + - preclude + ls-type:: annotation + hl-page:: 75 + hl-color:: green + id:: 6440ebb6-a98d-465f-8345-3da49486f653 + - constituent + ls-type:: annotation + hl-page:: 75 + hl-color:: green + id:: 6440ebc6-59b1-4b7e-ae67-75559989873b + - impeachable + ls-type:: annotation + hl-page:: 75 + hl-color:: green + id:: 6440ebd7-8c0c-4a18-9d86-bd95714f58ac +- # Instructions: Language of the Computer + ls-type:: annotation + hl-page:: 83 + hl-color:: yellow + id:: 64412821-6b54-47a0-9317-a4b042989fdf +- Operations of the Computer Hardware + ls-type:: annotation + hl-page:: 86 + hl-color:: yellow + id:: 64412ca1-c9b5-4d6b-ba19-d353992dd2f1 + collapsed:: true + - Three-operand arithmetic instructions +- Operands of the Computer Hardware + ls-type:: annotation + hl-page:: 89 + hl-color:: yellow + id:: 64412cc0-59a8-4a38-9094-1a7bd916a41f + collapsed:: true + - Registers, where operands of arithmetic instructions must reside + - Register size is a *word* (32 bit) + - 32 registers in MIPS. + - fewer registers to keep clock cycles fast (though 31 regs may not be faster then 32 regs) + - instruction format (5-bit field for register number) + - data transfer instructions + ls-type:: annotation + hl-page:: 91 + hl-color:: yellow + id:: 64412fc0-7ad1-4c8a-9c03-c198e741605b + - memory to register or inverse + - alignment restriction: *word*s must start at addresses that are multiples of 4. As a result, there is some restrictions on the address for `lw/sw` + hl-page:: 92 + ls-type:: annotation + id:: 6441425c-134d-449d-ae39-4db48a67054c + hl-color:: yellow + - memory is addressed by byte, remember this especially when dealing with array indices because the type of array elements decides the offset. + - MIPS is in the big-endian camp (though the textbook says so, the latest MIPS32 by default is little endian) + hl-page:: 93 + ls-type:: annotation + id:: 64414940-7d06-4339-af0b-974b1b34dbc5 + hl-color:: yellow + - Constant or Immediate Operands + ls-type:: annotation + hl-page:: 95 + hl-color:: yellow + id:: 64414a51-2d38-48ff-b0c0-bc53f9c5fadb + - Constant operands occur frequently, and by ==including constants inside arithmetic instructions==, operations are much ==faster== and use ==less energy== than if constants were ==loaded from memory==. + hl-page:: 95 + ls-type:: annotation + id:: 64414af2-05ea-4a88-baf6-a19462b4c3a9 + hl-color:: yellow + - Since MIPS supports ==negative constants==, there is no need for subtract immediate in MIPS. + ls-type:: annotation + hl-page:: 96 + hl-color:: yellow + id:: 64414b4e-cf31-4e7f-8320-2f1bbcbf9b32 +- Signed and Unsigned Numbers + ls-type:: annotation + hl-page:: 96 + hl-color:: yellow + id:: 64414b5f-de73-4bbc-812d-8ebd0f082ea0 + collapsed:: true + - binary digits + hl-page:: 96 + ls-type:: annotation + id:: 64414bb1-c764-493b-b555-4e241a31f255 + hl-color:: yellow + - value of `i`th digit: $d \times \text{Base}^i$ + - LSB and MSB + - Numbers have infinite number of digits, binary bit patterns are simply representatives of numbers. Thus, there are various ways of handling *overflow*. + hl-page:: 97 + ls-type:: annotation + id:: 64414c34-4dc9-4127-9938-faf0374b6c29 + hl-color:: yellow + - Signed numbers + - **sign and magnitude**: add a separate sign bit. Problems with this approach, need an extra step to set the sign during calculation, negative and positive zero + hl-page:: 98 + ls-type:: annotation + id:: 64414d4d-71a4-44df-9475-d710bfee40d3 + hl-color:: yellow + - **two's compliment** + - the value of this form can be written as $(d_{31} \cdot -2^{31}) + d_{30} \cdot 2^{30} + \dots$, note the first $-2^{31}$ + id:: 64414f1b-142c-4301-a5a2-6dc0ad3b102b + - **one's compliment**: negate operation is to simply invert each bit + - sign extension: copy the sign repeatedly to fill the rest of the register when loading from memory + hl-page:: 99 + ls-type:: annotation + id:: 64414fbb-4773-4332-bf21-f533847d0bde + hl-color:: yellow + - This trick works because positive 2's complement numbers really have an infinite number of 0s on the left and negative 2's complement numbers have an infinite number of 1s. The binary bit pattern representing a number hides leading bits to fit the width of the hardware; sign extension simply restores some of them. + hl-page:: 101 + ls-type:: annotation + id:: 64415085-a9e5-4193-a5d1-9c90f5d63ea8 + hl-color:: yellow +- Representing Instructions in the Computer + ls-type:: annotation + hl-page:: 103 + hl-color:: yellow + id:: 64414d2f-3ea8-45a6-9a7a-b84f74a554cf + collapsed:: true + - MIPS Fields + ls-type:: annotation + hl-page:: 105 + hl-color:: yellow + id:: 64415179-3df0-431e-9e53-8608796931dd + - In order to keep the instructions regular (aligned by word), MIPS has irregular layouts for different types of instruct. + - R-type: `op | rs | rt | rd | shamt | funct` + - I-type: `op | rs | rt | constant/address` + - The 16-bit address means a `lw` can only load from a region of $\pm 2^{15}$ bytes of the base register. + - here `rt` serves as the destination register +- Logical Operations + ls-type:: annotation + hl-page:: 110 + hl-color:: yellow + id:: 64415118-595d-4125-b641-333d82a58006 + collapsed:: true + - `sll` and `srl`, use the `shamt` (shift amount) field + - `andi` `ori` extend their 16-bit constant field by filling 0s + - there is no exact instruction for bitwise not, but a `nor` (not or, `a NOR b = NOT(a OR b)`) instruction (perhaps in order to keep the 3-operand format) +- Instructions for Making Decisions + ls-type:: annotation + hl-page:: 113 + hl-color:: yellow + id:: 644154b4-a07e-46fd-aa88-178297b61434 + collapsed:: true + - conditional branches: `bne` and `beq` + hl-page:: 113 + ls-type:: annotation + id:: 644156a7-b2b5-4010-97e3-a432f077cd33 + hl-color:: yellow + - Loops + ls-type:: annotation + hl-page:: 115 + hl-color:: yellow + id:: 64415778-92af-4d30-b3b4-0b3dddd397f4 + - `slt` and `slti`: if `rs < rt`/`rs < imm` then `rd=1` else `rd=0` + - MIPS assemblers use the combination `slt/slti` and `beq/bne` and `$zero` to create all relative conditions + - `sltu/stliu` signed and unsigned comparison are different, thus an unsigned version is provided + - Case/Switch Statement: *jump address table* and `jr` instruction (the runtime destination address is stored in register) + hl-page:: 118 + ls-type:: annotation + id:: 644159a7-3b96-4924-8260-0cb300307c86 + hl-color:: yellow +- Supporting Procedures in Computer Hardware + ls-type:: annotation + hl-page:: 119 + hl-color:: yellow + id:: 644156f5-e485-4140-be11-6ef87a585383 + collapsed:: true + - `jal` jumps to an address and simultaneously saves the address of the following instruction in `$ra` + - `jr` jumps to the address specified in a register + - Calling convention for register: + - `$a0-$a3`: four argument registers in which to pass parameters + - `$v0–$v1`: two value registers in which to return values + - `$ra`: one return address register to return to the point of origin + - `$t0–$t9`: temporary registers that are not preserved by the *callee* on a procedure call + id:: 644163d9-8d4b-43e8-acec-57a835c4ce48 + - `$s0–$s7`: saved registers that must be preserved on a procedure call (if used, *callee* saves and restores them) + - `$sp`: stack pointer to the most recently allocated address, `push` substract from `$sp` and `pop` add to `$sp` + - `$fp`: frame pointer to the first word of the frame of a procedure + - `$gp`: pointer to global static data + - FIGURE 2.11 What is and what is not preserved across a procedure call. + ls-type:: annotation + hl-page:: 125 + hl-color:: yellow + id:: 64416615-2966-4adb-a524-845337e588d3 + - Allocating Space for New Data on the Stack + ls-type:: annotation + hl-page:: 126 + hl-color:: yellow + id:: 6441667f-2639-458b-91fd-8bf6b5a2c6ae + - stack is also used to store variables that are local to the procedure but do not fit in registers + - procedure frame or activation record + ls-type:: annotation + hl-page:: 126 + hl-color:: yellow + id:: 64416688-3f6e-444e-b265-3e4a36ec51b8 + - a frame pointer offers a stable base register within a procedure for local memory-references, in that stack pointer changes during the procedure +- ASCII and String + hl-page:: 129 + ls-type:: annotation + id:: 644167c0-1ac2-42df-b01d-4fff03a393e7 + hl-color:: yellow + collapsed:: true + - `lb/lbu` and `sb` load/store the right most byte, `lh/lhu` and `sh` load/store the lower half word + - Some notes about how to organize a string + - character size + - length or end mark +- MIPS Addressing for 32-bit Immediates and Addresses + ls-type:: annotation + hl-page:: 134 + hl-color:: yellow + id:: 6441f1a7-4442-466f-9edc-776f6e0e6ecb + collapsed:: true + - 32-Bit Immediate Operands: `lui` loads a half word to the upper 16 bits of a register, and then a `ori` sets the lower 16 bits, thus loading a 32-bit immediate + hl-page:: 135 + ls-type:: annotation + id:: 6441fb16-91e4-42d8-94bf-5718dd9fc91b + hl-color:: yellow + - Addressing in Branches and Jumps + ls-type:: annotation + hl-page:: 136 + hl-color:: yellow + id:: 64427722-3a05-43f1-b00e-7dcc30cc74ff + - J-type instruction: `op | address (26 bits)` + - Since MIPS instructions are all 4-byte aligned, the unit of the address in PC-relative addressing is actually *word*. For example, 16-bit address in branch instruction actually represents an 18-bit address. + - MIPS Addressing Mode + ls-type:: annotation + hl-page:: 139 + hl-color:: yellow + id:: 64427a40-988f-4430-aaa5-84d3926c6234 + - 1. Immediate addressing: the operand is a constant within the instruction itself (e.g., `addi $rd, $rs, 4`) + 2. Register addressing: the operand is a register (e.g., `add $rd, $rs, $rt`) + 3. Base (displacement) addressing: the operand is at the memory location whose address is the sum of a register and a constant in the instruction (e.g., `lw $rd, 4($rs)`) + 4. PC-relative addressing: the branch address is the sum of the PC and a constant in the instruction (e.g., `beq $rs, $rt, #addr`) + 5. Pseudo-direct addressing: the jump address is the 26 bits of the instruction concatenated with the upper bits of the PC (e.g., `j #addr`) +- Parallelism and Instructions: Synchronization + ls-type:: annotation + hl-page:: 144 + hl-color:: yellow + id:: 644284b0-dd95-46e5-829f-4509200e5f8d + collapsed:: true + - a set of hardware primitives with the ability to atomically read and modify a memory location + hl-page:: 144 + ls-type:: annotation + id:: 64428547-07fb-49e2-9e3f-a7ac95b7e8e0 + hl-color:: yellow + - **atomic exchange**: interchange a value in a register for a value in memory + - Introduces some challenges in the processor design + - `while (xchg(&lock, 1) == 1) ;` + - **MIPS `ll/sc`**: a pair of instructions in which the second instruction returns a value showing whether the pair of instructions as if one atomic instruction. + - `while (ll(&lock) == 1 && sc(&lock, 1)) ;` + - `sc` will fail after either another attempted store to the `ll`ed address or ==any exception==. It is possible to create deadlock where `sc` can never complete due to repeated page faults. +- Translating and Starting a Program + ls-type:: annotation + hl-page:: 146 + hl-color:: yellow + id:: 64428b6e-1dba-4bd4-ab91-d18ae9cb9cf6 + collapsed:: true + - Assembler + ls-type:: annotation + hl-page:: 147 + hl-color:: yellow + id:: 64428b72-2291-4380-a2ad-dc5cdc82315e + - pseudoinstructions: assembler translates these instructions into equivalent machine instructions. Register `$at` is reserved for such translations. + hl-page:: 147 + ls-type:: annotation + id:: 64428b7b-0678-4fc3-a105-71fdc6185144 + hl-color:: yellow + - Example 1: `move $t0, $t1` -> `add $t0, $t1, $zero` + - Example 2: `blt $t0, $t1, LABEL1` -> `slt $at, $t0, $t1; bne $at, $zero, LABEL1` + - The assembler turns the assembly language program into an *object file*, which is a combination of ==machine language instructions==, ==data==, and information needed to place instructions properly in memory (==symbol table==, ==relocation information==). + hl-page:: 148 + ls-type:: annotation + id:: 64428ce2-cd44-4d6d-a311-dc7aa3f656ab + hl-color:: yellow + - The object file for UNIX systems typically contains six distinct pieces: *object file header*, *text segment*, *static data segment*, *relocation information*, *symbol table*, and *debug information* + hl-page:: 148 + ls-type:: annotation + id:: 64428f56-bd42-4cc4-a4dd-bf8b05e76fc3 + hl-color:: yellow + - Linker + ls-type:: annotation + hl-page:: 149 + hl-color:: yellow + id:: 64428f9a-632e-4bdb-80d5-56febc2bb977 + - Re-compile the whole program at each change to a single procedure is huge waste, so compile/assemble independently and finally link them together. + - 3 steps for the linker: + - hl-page:: 149 + ls-type:: annotation + id:: 6442909f-b0b5-4706-8454-f89466e3ff7e + hl-color:: yellow + 1. Place code and data modules symbolically in memory. + 2. Determine the addresses of data and instruction ==labels==. + 3. Patch both the internal and external ==references==. + - Example Problem: Linking Object Files + hl-page:: 150 + ls-type:: annotation + id:: 6442957a-fb16-4069-8e74-d470c01ba18c + hl-color:: yellow + - Dynamically Linked Libraries + ls-type:: annotation + hl-page:: 152 + hl-color:: yellow + id:: 644292fb-c10c-43f6-8152-941b009e14c2 + - Library routines are not linked and loaded until the program is run. Keep extra info on the location and name of non-local procedures. + hl-page:: 152 + ls-type:: annotation + id:: 644295c2-95e3-4da8-ba89-5470c9049fce + hl-color:: yellow + - The *program loader* uses the extra information to find the proper libraries and ==update all external references==. + - Lazy procedure linkage: Instead of linking all library routines that might be called, link only those are actually called at runtime. + - Assume there is a table of entries for external routines, at static linkage stage, set them all to a dummy address of a dynamic linker/loader. At runtime, the program jumps to this dummy address, and executes this linker/loader which finds the desired routine, remaps it and changes the address in the indirect jump location. Next time this routine is called, this indirect jump will go to the desired routine. +- A C Sort Example to Put It All Together + ls-type:: annotation + hl-page:: 155 + hl-color:: yellow + id:: 6442a0c7-e518-46f3-979d-9c104093343b + collapsed:: true + - Skipped, since it is easy +- Arrays versus Pointers + ls-type:: annotation + hl-page:: 164 + hl-color:: yellow + id:: 6442a0b8-5961-423d-bbcf-96cf55fd55cf + collapsed:: true + - An example piece of code which iterate over an array by both pointer and index + - Skipped, since it is easy +- Advanced Material: Compiling C and Interpreting Java + ls-type:: annotation + hl-page:: 168 + hl-color:: yellow + id:: 6442a09e-3edb-4593-833a-626506177900 + collapsed:: true + - Skipped, since it is compiler's job (Control Flow Graph???) +- Real Stuff: ARMv7 (32-bit) Instructions + ls-type:: annotation + hl-page:: 194 + hl-color:: yellow + id:: 6442a15d-dfe5-49b0-9b78-5f109e674e2f +- Real Stuff: x86 Instructions + ls-type:: annotation + hl-page:: 198 + hl-color:: yellow + id:: 6442a152-baff-4ccb-aee2-2f548e14903e +- Real Stuff: ARMv8 (64-bit) Instructions + ls-type:: annotation + hl-page:: 207 + hl-color:: yellow + id:: 6442a1ad-0491-48d7-84b7-7feba159d9dd + collapsed:: true + - The philosophy of ARMv8 is much closer to MIPS than ARMv7. For example, the `$zero`, the `beq/bne` instead of the condition bit +- Design Principles + collapsed:: true + - Design Principle 1: Simplicity favors regularity. + ls-type:: annotation + hl-page:: 88 + hl-color:: yellow + id:: 644152b5-ee31-4da9-86e4-33d7472f04c3 + - Design Principle 2: Smaller is faster. + ls-type:: annotation + hl-page:: 90 + hl-color:: yellow + id:: 644152aa-00c2-4542-abaf-7048e6d37904 + - Design Principle 3: Good design demands good compromises. + ls-type:: annotation + hl-page:: 106 + hl-color:: yellow + id:: 64415292-0727-4366-8717-ecca11267baf +- Word List 2 + collapsed:: true + - palatable 可口的;味美的 + ls-type:: annotation + hl-page:: 86 + hl-color:: green + id:: 64412a38-84ed-4f97-b83d-911772eb7158 + - rationale 基本原理;根本原因 reason + hl-page:: 86 + ls-type:: annotation + id:: 64412bbd-513c-4e00-8576-7ef88749e552 + hl-color:: green + - moot 无考虑意义的 + ls-type:: annotation + hl-page:: 99 + hl-color:: green + id:: 64414e6e-e6f7-4d05-865f-a18455c509ba + - dichotomy 二分法;两面性;(the separation between two opposite groups) + hl-page:: 117 + ls-type:: annotation + id:: 6441591a-02ed-4556-8fd7-5fdb310063e7 + hl-color:: green + - spill (使)洒出,泼出,溢出: + ls-type:: annotation + hl-page:: 121 + hl-color:: green + id:: 64416330-597c-4245-8d53-a5dc643ea05f + - wax and wane 月亮盈/亏 + hl-page:: 127 + ls-type:: annotation + id:: 64416671-9318-4296-9588-c0421c02cdd2 + hl-color:: green + - interpose 将…置于(二者)之间;插话 + hl-page:: 144 + ls-type:: annotation + id:: 64428502-8d37-4bad-a24f-6edfa9796740 + hl-color:: green + - succinct 简明的;言简意赅的 concise + hl-page:: 148 + ls-type:: annotation + id:: 64428c72-5e4f-4f33-a1ac-cadd4610f04a + hl-color:: green + - stitch 缝 + ls-type:: annotation + hl-page:: 149 + hl-color:: green + id:: 6442903b-ae86-4608-8d83-60771782b088 + - anatomy 解剖学 + ls-type:: annotation + hl-page:: 168 + hl-color:: green + id:: 64429b75-4433-4e39-8085-ad2e791dbf33 + - headstart 领先 + ls-type:: annotation + hl-page:: 207 + hl-color:: green + id:: 6442a2ee-8c2e-4ed2-a67f-23db50972a71 + - toil (长时间)苦干,辛勤劳作 + hl-page:: 209 + ls-type:: annotation + id:: 6442a05d-69ba-4161-9ffa-bb305a17fcf1 + hl-color:: green +- # Arithmetic for Computers + hl-page:: 225 + ls-type:: annotation + id:: 6442a5d0-e073-4cd4-a6c6-1bb664ee952a + hl-color:: yellow +- Addition and Subtraction + ls-type:: annotation + hl-page:: 227 + hl-color:: yellow + id:: 64433f1c-c023-429e-88d0-46cad778477c + collapsed:: true + - Addition is to add digits bit by bit from right to left with carries passed to the left digit. + - Subtraction uses addition, negate the second operand before adding. + - Overflow + collapsed:: true + - The result cannot be represented with the hardware. + - ==No overflow== can occur when ==adding operands with different signs== or ==subtracting operands with the same sign==. + - Overflow occurs when adding 2 positives and the sum is negative, or vice versa; and when subtracting a negative from a positive and get a negative, or vice versa. + - For a software detection, you can use `xor` to detect sign difference. + - For overflow (carry) of unsigned numbers, though often ignored, use the inequation $(\text{MAXUINT})2^{32}-1 \lt A + B \rightarrow 2^{32}-1 -A \lt B \rightarrow \overline{A} \lt B$ + - 这里补充一下408的内容,说了3种判断方法(不过本质上一样的),设 $A + B = S$ + - 一位符号位,就是英文教材里面的方法,适合软件判断 (因为你没有进位信号也没有双符号位)$\text{OF} = A_sB_s\overline{S_s}+\overline{A_s}\overline{B_s}S_s$ + - 两位符号位,无非就是给MSB前面添2位罢了。计算结果的双符号位 $S_{s1}S_{s2}$ 有4种组合,分别表示无溢出和正负溢出,判断为 $\text{OF} = S_{s1}\oplus S_{s2}$ + - 符号位进位和最高位进位,$\text{OF} = C_{n} \oplus C_{n-1}$ + - In MIPS, `add/addi/sub` causes exceptions on overflow; while `addu/addiu/subu` does not cause exceptions on overflow. + - Since C ignores overflow, it always uses `*u` instructions. + - saturating operation: When overflow, set the result to the MAX/MIN value rather than a modulo to 2^32 + hl-page:: 230 + ls-type:: annotation + id:: 644347f7-9833-4208-99f7-655f94b5a7b5 + hl-color:: yellow +- Multiplication + ls-type:: annotation + hl-page:: 232 + hl-color:: yellow + id:: 64434f2d-d63b-4840-96da-918f7a04cb97 + collapsed:: true + - Names of the operands: `product = multiplicand * multiplier` + - Observation + - n-bit *multiplicand* and m-bit *multiplier* result in a (m+n)-bit *product* (overflow) + - The manual multiplication method in essence is a ==shift-and-add== process. + - Sequential Version of the Multiplication Algorithm and Hardware + ls-type:: annotation + hl-page:: 233 + hl-color:: yellow + id:: 64435253-b5bf-4832-ad91-025cede3bafd + collapsed:: true + - Naive version + - Three registers, namely 64-bit multiplicand, 32-bit multiplier and 64-bit product. + - ![image.png](../assets/image_1682135277029_0.png){:height 223, :width 449} + - Pseudo code for the algorithm + ```C + uint64_t multiplicand = A; + uint32_t multiplier = B; + uint64_t product = 0; + for (int i = 0; i < 32; ++ i) { + if (multiplier & 0x1) product += multiplicand; // 1. test multiplier[0] and add to product + // else do nothing, or add 0 + multiplicand <<= 1; // left shift multiplicand + multiplier >>= 1; // right shift multiplier + } + ``` + - Though the textbook says that each iteration takes 3 clock cycles, I think all these can be done in 1 cycle (虽然时序会比较垃圾就是了). The following refined version no doubt needs only 1 cycle each iteration. + - Refined version + - one 64-bit register for product which right-shifts once at a tick + - 31 cycles (the first partial product is already in product register by initialization, so save one addition from the original 32 iterations) + - ![image.png](../assets/image_1682136092546_0.png) + - Signed Multiplication + ls-type:: annotation + hl-page:: 236 + hl-color:: yellow + id:: 64435d8f-d70f-4968-be40-ddbf8ef5a19e + - The easiest solution is that, first convert all operands to positive and calculate the sign separately; after multiplication, convert the the product to its correct sign. + - The *refined version* is ready to deal with signed multiplication by the following 2 steps: + - Enable sign extension on right shift of *product* register. + - Subtract rather than add on the last partial product. This operation originates from ((64414f1b-142c-4301-a5a2-6dc0ad3b102b)) + - Then we can get a 32-bit product in the lower word of the *product* register. + - Faster Multiplication + ls-type:: annotation + hl-page:: 236 + hl-color:: yellow + id:: 6443dd4d-c534-4911-9e81-3b4b0ef396d9 + collapsed:: true + - A balance between resource and speed + - FIGURE 3.7 Fast multiplication hardware. + hl-page:: 237 + ls-type:: annotation + id:: 6443e016-d955-4433-9956-46ad682890ae + hl-color:: yellow + collapsed:: true + - ![image.png](../assets/image_1682170089022_0.png) + - Only $\log_2(32) = 5$ times addition. + - Unroll the loop into a tree-like + - There are many other ways to implement a multiplier circuit, such as Array Multiplier using Carry-Save Addition, or pipeline it, or booth. + - Principle of booth algorithm + - The simplest Radix-2 booth multiplier is based on such an observation (again): + $$ + A = A_{\text{n-1}}A_{\text{n-2}}\dots A_{\text{1}}A_{\text{0}} + \\ = - A_{n-1} \times 2^{n-1} + \sum_{i=0}^{n-2} A_{i}\times 2^{i} + \\ = - A_{n-1} \times 2^{n-1} + (2 - 1)\sum_{i=0}^{n-2} A_{i}\times 2^{i} + \\= (A_{n-2}- A_{n-1})\cdot 2^{n-1} + (A_{n-3}- A_{n-2})\cdot2^{n-2} \cdots (A_{1}- A_{0})\cdot2^{1} + (A_{-1}- A_{0})\cdot 2^{0} + $$ + - When $A_{i-1} = A_{i}$, the result is 0. Thus, Radix-2 Booth Algorithm examines the 2 LSBs and decides which operation to perform (shift (`00/11`) or add (`01`) or subtract (`10`)). + - Extending to Radix-4, the item looks like this: $(A_{2k+1}-2A_{2k}+A_{2k-1})\times 2^{2k}$. And we will have a more complicated operation table since the algorithm examines 3 bits. + - Radix-4 Booth Algorithm halves the number of partial products, thus improving the performance. +- Division + ls-type:: annotation + hl-page:: 238 + hl-color:: yellow + id:: 6443e24f-a691-45aa-809c-2e01aca20e0b + collapsed:: true + - $\text{dividend} = \text{quotient} \times \text{divisor} + \text{remainder}, \text{divisor} \gt \text{remainder}$ + collapsed:: true + - As for signed division, watch out for the *remainder*. There may be more than one seemingly reasonable pair of (*quotient*, *remainder*). One general rule for this is that, *remainder* has the same sign as the *dividend*. + - A Division Algorithm and Hardware (Unsigned) + hl-page:: 238 + ls-type:: annotation + id:: 6443e2eb-9d81-471b-9033-0e325140b4f2 + hl-color:: yellow + collapsed:: true + - Naive version + - ![image.png](../assets/image_1682171249934_0.png) + - Pseudo code + ```C + void div(uint32_t A, uint32_t B) { + uint64_t Divisor = B << 32; + uint64_t Remainder = A; + uint32_t Quotient = 0; + for (int i = 0; i < 33; ++ i) { + Remainder = Remainder - Divisor; // 1. try subtract + if (Remainder >= 0) { + Quotient = (Quotient | 1) << 1; // 2.a. suffice + } + else { + Quotient = (Quotient | 0) << 1; // 2.b. cannot subtract, restore + Remainder = Remainder + Divisor; + } + Divisor = Divisor >> 1; // 3. next bit + } + } + ``` + - Refined version + collapsed:: true + - ![image.png](../assets/image_1682173423614_0.png) + - Use less resource, only a 64-bit register is needed, which is `0 | Dividend` at initialization and `Remainder | Quotient` after 32 cycles (32 left shifts). + - A working SystemVerilog implement + ```verilog + module divider( + input logic clk, + input logic rst, + input logic en, + input logic [31:0] operandA, + input logic [31:0] operandB, + output logic operation_valid, + output logic busy, + output logic [63:0] result + ); // unsigned divider + parameter COUNT = 6'd32; + logic [5:0] count; + logic [31:0] divisor; + logic [31:0] alu_result; + logic restore; + logic [63:0] remainder; + always @(posedge clk, negedge rst) + if (!rst) divisor <= 32'b0; + else if (en && count == 0) divisor <= operandB; + always @(posedge clk, negedge rst) + if (!rst) remainder <= 64'b0; + else if (en && count == 0) remainder <= {32'b0, operandA}; + else if (busy) begin + if (restore) remainder <= remainder << 1; + else remainder <= {alu_result[30:0], remainder[31:0], 1'b1}; + end + always @(posedge clk, negedge rst) + if (!rst || !en) count <= 0; + else if (count == 0) count <= 1; + else if (count < COUNT) count <= count + 1; + else count <= 0; + assign operation_valid = (operandB == 32'b0 && en) ? 1'b0 : 1'b1; + assign busy = en && count; + assign alu_result = remainder[63:32] - divisor; + assign restore = remainder[63:32] < divisor; + assign result = {alu_result[31:0], remainder[30:0], ~restore}; + // due to implementation issues, the remainder part will be over-shifted in the end + // here is a workaround + endmodule + ``` + - 无符号除法也可以用加减交替法(Non-restoring Division),一种简单的改进。国内的计组教材上讲的都是定点小数,如果需要做整数的话,需要把 Divisor 先左移 N 位。好像说,不恢复余数法,其实是 SRT 方法的一种特殊情况来着。感觉还是没怎么搞明白,这东西可以单独开一门课,不过无所谓了,反正题会做就行。 + - 我的评价是,看这个吧。COMPUTER ARITHMETIC : Algorithms and Hardware Designs + - 补码除法(爱来自408) + - 加减交替法:符号位和数值位一起参加运算(全部是补码),商符自然形成。 + - 先做一次加减法运算:若 Dividend 和 Divisor 同号,则相减;否则相加。 + - 然后重复N次:若 Remainder 和 Divisor 同号,商上1,左移 Remainder 并减 Divisor;否则 Quotient 上0,左移 Remainder 并加 Divisor + - 最后一步给 Quotient 恒置1 + - 不过说实在的,我没理解,手动算好像也不对劲,==不知道哪里出了问题==,过天再看看。这东西的设计还挺好玩的。 + - Faster Division + ls-type:: annotation + hl-page:: 243 + hl-color:: yellow + id:: 6444043e-8c74-474e-a4f0-b2011ebb9b10 + - Similar to multiplier, there are also many ways to build a divider. However, unlike multiplier, divider cannot use array-adder, since it cannot be known ahead whether the subtraction is available. There is a method based on lookup table and prediction, called *SRT division*. +- Floating Point + ls-type:: annotation + hl-page:: 245 + hl-color:: yellow + id:: 644410ff-f158-431d-ab11-22f32e63a6da + collapsed:: true + - Normalized number: a number in scientific notation without leading 0s. + - binary point: the point, but in base 2 + hl-page:: 245 + ls-type:: annotation + id:: 644494f0-4e7e-4356-bf49-5d7f61999a78 + hl-color:: yellow + - floating point normalized form: $1.xxxxxxxx_{\text{two}} \times 2^{yyyy}$. Since there is no leading 0s, the only bit to the left of the *binary point* is 1. + - Floating-Point Representation + ls-type:: annotation + hl-page:: 246 + hl-color:: yellow + id:: 644495bf-6828-4439-a332-f5a9a176adaf + - A single-precision floating point has 32 bits, `1 | 8 | 23` bit(s) for the 3 components `s | exponent | fraction` + - A double-precision floating point has 64 bits, `1 | 11 | 52` bits for the 3 components. + - General form of floating-point numbers: $(-1)^{\text{s}} \times \text{F} \times 2^{\text{E}}$ + - overflow: the exponent is too large + hl-page:: 247 + ls-type:: annotation + id:: 6444979f-bc17-4cf6-9a68-0a7098db96b6 + hl-color:: yellow + - underflow: the ==negative== exponent is too large + hl-page:: 247 + ls-type:: annotation + id:: 644497a1-f169-4386-8e44-cf46566f6d47 + hl-color:: yellow + - **significant**: the 24-bit or 53-bit number comprised of the implicit leading 1 and the *fraction*. + - IEEE 754 encoding of floating-point numbers. + hl-page:: 248 + ls-type:: annotation + id:: 6444991d-e3c9-4161-863e-2c65db02a575 + hl-color:: yellow + - **Represent 0**: Since 0 has no leading 1, a reserved *exponent* `0` is there to represent the number. + - **Represent Infinity/NaN**:Two unusual cases are given another reserved *exponent* `255/2047`, representing *infinity* (`fraction = 0`) and *NaN* (`fraction != 0`) + - **Biased notation**: To simplify the sorting of floating-point numbers, the *exponent* field is designed to be an unsigned integer. But we also have to represent negative exponents, thus the *exponent* field is biased by `127/1023`. In other words, the real value of the exponent is the *exponent* field subtract bias. + - The real value of an IEEE-754 floating-point (==normalized==) number could be expressed as: $(-1)^{\text{s}}\times (1+\text{Fraction})\times 2^{\text{Exponent-Bias}}$ + - Ranges from $\pm 1.00\dots00_{\text{two}}\times 2^{-126}$ to $\pm 1.11\dots11_{\text{two}}\times 2^{+127}$ + - **de-normalized numbers**: The *exponent* field is `0`, but the actual exponent is `-126/-1022`. And there is no implicit leading 1. This form can represent a number smallest down to $0.00\dots01_{\text{two}} \times 2^{-126} = 1.0_{\text{two}}\times 2^{-149}$ + hl-page:: 271 + ls-type:: annotation + id:: 6444a660-b04c-4dd5-9f03-1df86f5feaf2 + hl-color:: yellow + collapsed:: true + - However, this prevents FPUs from getting faster, some architects raise exceptions for de-normalized IEEE-754 (they just don't implement such support) + - Floating-Point Addition + ls-type:: annotation + hl-page:: 252 + hl-color:: yellow + id:: 64449f20-2e2b-4aba-a152-fb5013cee9df + - FIGURE 3.14 Floating-point addition. + ls-type:: annotation + hl-page:: 254 + hl-color:: yellow + id:: 6444a190-55ce-4d72-b776-f65bd79c402e + - (1) Compare the exponents of the 2 numbers. Shift the smaller number to the right until its exponent would match the larger one + - (2) Add the significands + - (3) Normalize the sum, either `rsh` and `exp++` or `lsh` and `exp--` + - Check overflow/underflow + - (4) Round the significand + - Check if the result is normalized, in case rounding adds to the MSB. If not, go to (3). + - FIGURE 3.15 Block diagram of an arithmetic unit dedicated to floating-point addition. + ls-type:: annotation + hl-page:: 256 + hl-color:: yellow + id:: 6444a33a-1be3-46cf-88d7-bda594ff889b + - Floating-Point Multiplication + ls-type:: annotation + hl-page:: 255 + hl-color:: yellow + id:: 6444a351-91c2-4d55-b74d-e9b3bf0b1f3f + - FIGURE 3.16 Floating-point multiplication. + ls-type:: annotation + hl-page:: 258 + hl-color:: yellow + id:: 6444a815-7f89-4638-9bc0-a2539cf80a27 + - (1) Add the biased *exponents* of the 2 numbers (and subtract one bias since it is added twice) to get the new *exponent* field + - (2) Multiply the significands + - Different from addition, ==exponent alignment is no needed==. Directly multiply the significands. + - (3) Normalize and check over/underflow + - (4) Round the significand to the appropriate number of bits, and check normalized (or go to (3)) + - (5) Set the sign of the product + - Floating-Point Instructions in MIPS + ls-type:: annotation + hl-page:: 260 + hl-color:: yellow + id:: 6444a4ea-ad13-4a57-a881-cb7cbae7f65f + - Special instructions: arithmetic(single/double) `add.s/d`, comparison `c.eq.s/d`, branch `bclt/bclf`, data transfer `lwc1/swc1` + - Floating-point registers: `$f0` to `$f31`, each 32-bit. A double-precision register is actually an even-odd pair of single-precision registers (e.g., double register `$f2` = `{$f2, $f3}`) + - Accurate Arithmetic + ls-type:: annotation + hl-page:: 267 + hl-color:: yellow + id:: 6444ac21-f605-497c-b86e-4d36484f6e3a + - Keep 2 extra bit on the right during ==intermediate additions==, since hardware cannot hold infinite bits for intermediates. They are *guard* and *round*. + - **sticky bit**: a third bit which indicates whether there are any non-zero bits to the right of the *round bit* + - units in the last place (ulp): The number of bits in error in the LSBs (right-most bits) of the significand between the actual number and the rounded number. ==Measure of accuracy==. + hl-page:: 268 + ls-type:: annotation + id:: 6444af4d-4e28-41ab-97b9-a4442cbe9d9a + hl-color:: yellow + - IEEE 754 has 4 rounding modes: always round up (toward $+\infin$), always round down(toward $-\infin$), truncate, and round to nearest even. + hl-page:: 268 + ls-type:: annotation + id:: 6444b015-6430-4fce-b780-988a0caa63f5 + hl-color:: yellow +- Parallelism and Computer Arithmetic: Subword Parallelism + ls-type:: annotation + hl-page:: 271 + hl-color:: yellow + id:: 6444bd6c-1170-4f97-932b-af66246ff486 + collapsed:: true + - Many multimedia applications use 8-bit or 16-bit data units, thus the processor can perform simultaneous operations on short vectors of these smaller operands (which are stored in a single word-size register) + - subword parallelism, data level parallelism, SIMD +- Real Stuff: Streaming SIMD Extensions and Advanced Vector Extensions in x86 + ls-type:: annotation + hl-page:: 273 + hl-color:: yellow + id:: 6444bda7-e1ae-495b-86eb-5507878df5d8 + collapsed:: true + - multiple floating-point operands packed into a single 128-bit SSE2 register + hl-page:: 273 + ls-type:: annotation + id:: 6444bfc1-7314-4629-ae26-defc51e37a96 + hl-color:: yellow + - load and store multiple operands per instruction, perform arithmetic operations on multiple operands +- Going Faster: Subword Parallelism and Matrix Multiply + ls-type:: annotation + hl-page:: 274 + hl-color:: yellow + id:: 6444bdac-2bc0-490a-ac86-88c702b83c65 + collapsed:: true + - DGEMM: Double precision GEneral Matrix Multiply. A commonly used program for demonstration. + hl-page:: 274 + ls-type:: annotation + id:: 6444c0b0-7629-423c-b600-03286f22c6bd + hl-color:: yellow + - An interesting example for how to use SIMD to speedup matrix multiply. + ```C + void dgemm(int n, double* A, double* B, double* C) { + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) { + double cij = C[i+j*n]; /* cij = C[i][j] */ + for( int k = 0; k < n; k++ ) + cij += A[i+k*n] * B[k+j*n]; /* cij += A[i][k]*B[k][j] */ + C[i+j*n] = cij; /* C[i][j] = cij */ + } + } + + void dgemm_AVX(int n, double* A, double* B, double* C) { + for (int i = 0; i < n; i+=4) + for (int j = 0; j < n; j++) { + __m256d c0 = _mm256_load_pd(C+i+j*n); /* c0 = C[i][j] */ + for(int k = 0; k < n; k++) + c0 = _mm256_add_pd(c0, _mm256_mul_pd(_mm256_load_pd(A+i+k*n), _mm256_broadcast_sd(B+k+j*n))); + /* c0 += A[i][k]*B[k][j] */ + _mm256_store_pd(C+i+j*n, c0); /* C[i][j] = c0 */ + } + } + ``` +- Fallacies and Pitfalls + ls-type:: annotation + hl-page:: 278 + hl-color:: yellow + id:: 6444bdb2-6eb9-44b2-afcf-0a844265f162 + collapsed:: true + - Pitfall: ==Floating-point addition is not associative==. + ls-type:: annotation + hl-page:: 278 + hl-color:: yellow + id:: 6444c121-8df1-44b0-8077-5258bb7e907d + - Parallel execution strategies that work for integer data types ==NOT always work for floating-point== data types. + hl-page:: 279 + ls-type:: annotation + id:: 6444c1f0-95bb-4cb4-b82c-b68e52f8b53b + hl-color:: yellow + - Pitfall: The MIPS instruction add immediate unsigned (addiu) ==sign-extends== its 16-bit immediate field. + ls-type:: annotation + hl-page:: 279 + hl-color:: yellow + id:: 6444c1c5-55d6-4ee4-a522-7775b374d047 +- Word List 3 + collapsed:: true + - quirk 怪异的性格(或行为);怪癖 + ls-type:: annotation + hl-page:: 227 + hl-color:: green + id:: 64433ef2-0591-499f-b056-b2664d81156e + - vex 使恼火;使烦恼;使忧虑 + hl-page:: 232 + ls-type:: annotation + id:: 64434f3c-f512-4f58-a6f2-05e7aa355ec6 + hl-color:: green + - vague 不明确的;含糊的;模糊的; + ls-type:: annotation + hl-page:: 267 + hl-color:: green + id:: 6444aca4-859a-4303-b126-5b4fb95801ea + - equitable 公正的,合理的 + hl-page:: 268 + ls-type:: annotation + id:: 6444ae4a-b788-41ca-bf5a-f3e65eae3f37 + hl-color:: green + - quandary 困惑;进退两难;困窘 - delimma + hl-page:: 279 + ls-type:: annotation + id:: 6444c22c-5633-40b1-9a04-dc9ac66b5cbd + hl-color:: green + - glitch 小故障;小差错 + hl-page:: 281 + ls-type:: annotation + id:: 6444c19b-acea-4882-994b-06a1c34e86fb + hl-color:: green +- # The Processor + ls-type:: annotation + hl-page:: 291 + hl-color:: yellow + id:: 6444be95-7530-4e08-bdf8-c05ea9cc5b9d +- Introduction + ls-type:: annotation + hl-page:: 293 + hl-color:: yellow + id:: 6444e82b-27ec-4755-9dd9-e1d543ab66d3 + collapsed:: true + - A Basic MIPS Implementation + ls-type:: annotation + hl-page:: 293 + hl-color:: yellow + id:: 6444e833-44e0-4dd0-82f9-cdcdac95dc04 + - A subset of MIPS ISA: Memory-reference (`lw` `sw`), Arithmetic (`add` `sub` `and` `or` `slt`), and Branch (`beq` `j`) + - Several common steps: + - 1. Send the PC to memory and fetch the instruction + - 2. Read 1 or 2 registers, using the fields of the instruction + - 3. Except `j`, all instruction classed use the ALU after reading the registers, though for different purposes (arithmetic, address calculation, comparison) + - 4. After ALU, the actions required to complete various classes of instructions differ, such as load/store memory, write to register or change PC. + - FIGURE Abstract view of the MIPS subset's implementation + hl-page:: 295 + ls-type:: annotation + id:: 64477e93-0b18-4efa-83c0-2c8e71b90457 + hl-color:: yellow + - FIGURE Basic implementation with multiplexors/control. + hl-page:: 296 + ls-type:: annotation + id:: 64477f1f-1cb6-4a55-bfad-148b4a3fb467 + hl-color:: yellow + - **Multiplexor**: One destination may have multiple sources, and thus we need to select from these sources according to the type of the instruction. + - **Control unit**: accepts the instruction as input, and generates signals to control other functional units (e.g., ALU, Memory) and the multiplexors. +- Logic Design Conventions + ls-type:: annotation + hl-page:: 297 + hl-color:: yellow + id:: 64477d00-f17e-40fd-8178-6e21175025a0 + collapsed:: true + - Combinational elements and State elements + collapsed:: true + - For combinational, outputs depend only on the current inputs + hl-page:: 297 + ls-type:: annotation + id:: 6447833a-b83a-40a0-98fa-59b6fa78c15c + hl-color:: yellow + - State elements completely characterize the computer, which has (at least) 2 inputs and 1 output. The clock is used to determine when to write, and a state element can be read at any time. + hl-page:: 297 + ls-type:: annotation + id:: 644a2196-f210-49e3-bf47-5439713057aa + hl-color:: yellow + - Clocking Methodology + hl-page:: 298 + ls-type:: annotation + id:: 644a2259-9a11-4a2d-bbb8-bc71ef75b9ab + hl-color:: yellow + collapsed:: true + - Edge-triggered clocking: state elements are only updated on a clock edge. + - Combinational logic must have its inputs come from a set of state elements and its outputs written into a set of state elements. These inputs are values written in a previous cycle, while the outputs are values that can be used in a following clock cycle. + hl-page:: 298 + ls-type:: annotation + id:: 644a24c1-45e7-4763-8284-25b7d875d2b6 + hl-color:: yellow +- Building a Datapath + ls-type:: annotation + hl-page:: 300 + hl-color:: yellow + id:: 644a22b7-e481-4b81-b97d-717955fa09f6 + collapsed:: true + - Program Counter and Instruction Memory + - PC register, Adder, Instruction memory's address input and data output + - FIGURE 4.6 + ls-type:: annotation + hl-page:: 302 + hl-color:: yellow + id:: 644a5b33-a765-4cfb-a990-55668110f7cc + - R-Format + - **register file**: Each register can be read/written by specifying the register number + hl-page:: 301 + ls-type:: annotation + id:: 644a5b95-b5ad-4568-b41c-783f59940a6c + hl-color:: yellow + - We need to read 2 registers and write 1 register, and this gives an intuition about the interface of the *register file*: 2 read address, 2 read output, and 1 write address, 1 write data, and an additional control signal `RegWrite` controlling whether to write. + - Write to *register file* is edge-triggered and with an explicit signal, while reads are combinational + - ALU: 2 32-bit inputs and a 32-bit result (as well as a 1-bit signal for zero flag). Additionally, there is a control signal `ALU Operation` + hl-page:: 301 + ls-type:: annotation + id:: 644a5d36-fabb-4719-831d-fbcfe797b925 + hl-color:: yellow + - FIGURE 4.7 + ls-type:: annotation + hl-page:: 302 + hl-color:: yellow + id:: 644a603d-78ee-4284-ab39-8ae5dfb19818 + - Memory Reference + hl-page:: 303 + ls-type:: annotation + id:: 644a610a-9f03-4213-a7f0-87a0d519909b + hl-color:: yellow + - Need register file and ALU to compute the target address + - Sign-Extend Unit: sign-extend the 16-bit immediate field in the instruction + hl-page:: 303 + ls-type:: annotation + id:: 644a6150-f971-4fb1-a2be-e2fd0d068e1c + hl-color:: yellow + - Data Memory: Despite the read address and read output, since it is writable, write address, write data and write control are needed as well. + - FIGURE 4.8 + ls-type:: annotation + hl-page:: 304 + hl-color:: yellow + id:: 644a6212-7e80-4d72-9242-2e719b8a43d8 + - Branch + hl-page:: 303 + ls-type:: annotation + id:: 644a627d-01be-4bb7-b84d-c7b66c1a48a0 + hl-color:: yellow + - Comparison between 2 operand registers: re-use the ALU + - branch taken and not taken: Replace PC with, *branch target address* or incremented PC + - Compute branch target address: Sign-Extension and Adder + hl-page:: 303 + ls-type:: annotation + id:: 644a6329-781d-4c60-b194-ac5aafba89f6 + hl-color:: yellow + - Sign-extend the constant(offset) field of the instruction + - The relative base of this computation is `PC + 4` + - The offset field needs to be left-shifted by 2 + - FIGURE 4.9 + ls-type:: annotation + hl-page:: 305 + hl-color:: yellow + id:: 644a64b3-3c73-48cb-b5b6-428a3cf7e961 + - Creating a Single Datapath + hl-page:: 305 + ls-type:: annotation + id:: 644a62f9-c49b-4e03-9ef3-deafe6f9f37d + hl-color:: yellow + - execute all instructions in a Single clock cycle, so no datapath resource can be used more than once per instruction + hl-page:: 305 + ls-type:: annotation + id:: 644a6592-a327-499f-81ae-1b4a6e7be71a + hl-color:: yellow + - To ==share a datapath element== between two different instruction classes, we may need to allow multiple connections to the input of an element, using a ==multiplexor and control signal== to select among the multiple inputs. + hl-page:: 305 + ls-type:: annotation + id:: 644a6f81-bceb-4923-afcb-437fcbc26b91 + hl-color:: yellow + - Share an ALU for Memory-Reference and Arithmetic instructions + - Share the write-back path between `lw` and Arithmetic + - Share sign-extend unit between Branch and Memory + - A separate Add unit to calculate branch target + - FIGURE 4.11 The simple datapath for the core MIPS architecture combines the elements required by different instruction classes. + ls-type:: annotation + hl-page:: 307 + hl-color:: yellow + id:: 644a6fb3-921e-4395-b132-f4be1a542268 +- A Simple Implementation Scheme + ls-type:: annotation + hl-page:: 308 + hl-color:: yellow + id:: 644a6306-3290-4abd-b506-7234340a9977 + collapsed:: true + - With the datapath construction above, we add a control function to complete the implementation. + - The ALU Control + ls-type:: annotation + hl-page:: 308 + hl-color:: yellow + id:: 644a7005-84e0-47ae-8f10-0bc02cd041f3 + - multiple levels of decoding + hl-page:: 309 + ls-type:: annotation + id:: 644a72c6-9a8f-407f-95df-031a6598d422 + hl-color:: yellow + - Main Control generates `ALUOp` which indicates the instruction class (Memory, Branch, Arithmetic). And the `ALUOp` together with `funct` field generate the actual signals to control ALU + - This technique leads to smaller control unit, which is potentially faster + - A truth table that maps Instructions to the ALU control input + - Designing the Main Control Unit + ls-type:: annotation + hl-page:: 310 + hl-color:: yellow + id:: 644a71a0-c19d-40aa-bef1-eefdb8b311df + - The input of this "function" is the 6-bit `OP` field of Instruction, and the outputs are the control signals, except for `ALUOp`(explained above) and `PCSrc` + - The `PCSrc` signal selects the next PC, which cannot be decided from the Instruction only. Comparison result of the 2 operands is needed in combination with the `OP` field to control the multiplexor connected with PC. + - Why a Single-Cycle Implementation Is Not Used Today + ls-type:: annotation + hl-page:: 320 + hl-color:: yellow + id:: 644a7930-5ef2-44e8-a468-9315b8cad591 + - We must assume that the clock cycle is equal to the ==worst-case delay== for all instructions, which violates the principle of ==making the common case fast==. + hl-page:: 321 + ls-type:: annotation + id:: 644a795f-d067-4a11-ba2d-8e37d43e9b1d + hl-color:: yellow +- An Overview of Pipelining + ls-type:: annotation + hl-page:: 321 + hl-color:: yellow + id:: 644a7223-c882-446f-adf5-3b819386c6c2 + collapsed:: true + - Speedup from pipelining + - Under ideal conditions (e.g., the stages are perfectly balanced) and with a large number of instructions, the speed-up from pipelining is approximately equal to the ==number of pipe stages== + hl-page:: 324 + ls-type:: annotation + id:: 644a7f5b-f357-4235-b3cf-034877a5946d + hl-color:: yellow + - Pipelining improves performance by ==increasing instruction throughput==, as opposed to decreasing the execution time of an individual instruction, but instruction throughput is the important metric because ==real programs execute billions of instructions==. + hl-page:: 326 + ls-type:: annotation + id:: 644a817b-8d05-418d-829b-8952b2fa2f5c + hl-color:: yellow + - Designing Instruction Sets for Pipelining + ls-type:: annotation + hl-page:: 326 + hl-color:: yellow + id:: 644a8196-0384-44b9-b141-c2e78451cb7e + - Aligned instructions; Regular instruction formats; Restricted memory operands (only load/store); Aligned operands (data address) + - Pipeline Hazards + ls-type:: annotation + hl-page:: 326 + hl-color:: yellow + id:: 644a80a5-e321-4d67-a16a-98fd02db800c + - **hazards**: There are situations in pipelining when the ==next instruction cannot execute in the following clock cycle==. + hl-page:: 326 + ls-type:: annotation + id:: 644a85c9-d4a3-40e1-abca-0bf71ae67004 + hl-color:: yellow + - Structural Hazard + hl-page:: 326 + ls-type:: annotation + id:: 644a8737-93a8-474b-8029-1eff257568e0 + hl-color:: yellow + - Hardware cannot support the combination of instructions (that we want to execute in the same clock cycle) + hl-page:: 326 + ls-type:: annotation + id:: 644a8932-4413-4859-866c-c008e57947bd + hl-color:: yellow + - Example: Assume that we do not have separate instruction and data memory, then a `lw` will monopolize the memory bus and thus prevent an instruction fetch in the same cycle (resulting in a bubble in the Fetch stage). + - Data Hazards + ls-type:: annotation + hl-page:: 327 + hl-color:: yellow + id:: 644a8a16-8e43-4d26-a968-8af915459447 + - The pipeline must be stalled because one step must wait for another to complete. More specifically, the dependence of one instruction on an earlier one that is still in the pipeline. + hl-page:: 327 + ls-type:: annotation + id:: 644a8a51-e14d-4e22-91d9-6f4382e713c2 + hl-color:: yellow + - Example: + ```assembly + add $s0, $t0, $t1 + sub $s1, $s0, $t3 + ; The add is in EX, and sub is in ID. + ; The result of add isn't yet written back to reg file($s0), + ; while sub needs that result + ``` + - Solution: **forwarding**(bypassing). Directly feed the missing result to the next instruction rather than wait for the result being written back to reg file. + hl-page:: 327 + ls-type:: annotation + id:: 644a8c19-dd9f-4025-9fc7-f7e6881f1ae6 + hl-color:: yellow + collapsed:: true + - Forwarding cannot solve all data hazards, e.g., in a load-use case, the following instruction has to wait for data being fetched from memory. + - pipeline stall + ls-type:: annotation + hl-page:: 329 + hl-color:: yellow + id:: 644a8e47-9fcf-4944-8036-1c831ec18918 + - Another solution: **re-ordering** the instructions. + - Control(Branch) Hazards + hl-page:: 330 + ls-type:: annotation + id:: 644a8dc0-abb5-4c17-bece-30484d124bc7 + hl-color:: yellow + - Make a decision based on the results of one instruction while others are executing. + hl-page:: 330 + ls-type:: annotation + id:: 644a8f6f-732d-41f9-ab03-66062a7b157b + hl-color:: yellow + - The branch class instruction. The pipeline cannot know what the next instruction should be until the branch is resolved. + - In the case of classical MIPS 5-stage pipeline, branch leads to an 1-cycle stall (C0: Fetch branch; C1: Decode branch, and the ALU combinational circuit already resolved the branch; C2: Fetch new instruction according to the result of ALU, and this result is written to EX stage Flip-Flop). + - Solution: prediction + hl-page:: 332 + ls-type:: annotation + id:: 644a92cc-7301-491e-961a-64310ef0252b + hl-color:: yellow + - The simplest policy is to predict that each branch is not taken or taken. + - Dynamic hardware predictors make guesses depending on the behavior of each branch. For example, keep a history for each branch and use the recent past behavior to predict. + - When failed, the pipeline needs to neutralize the following instruction and restart the pipeline. + - Another solution: delayed decision + hl-page:: 333 + ls-type:: annotation + id:: 644a9505-08fd-47ae-b202-3f2f2534dd90 + hl-color:: yellow + - Place an instruction not affected by the branch immediately after the branch instruction, and always executes it. +- Pipelined Datapath and Control + ls-type:: annotation + hl-page:: 335 + hl-color:: yellow + id:: 644a8636-5eb3-45d2-ab01-a0acd29747b0 + - Five stages: IF, ID, EX, MEM, WB +- Word List 4 + collapsed:: true + - anatomy 解剖学 + ls-type:: annotation + hl-page:: 321 + hl-color:: green + id:: 644a7236-aa52-48a8-8f9a-faa5ca23fa99 + - filthy 肮脏的;淫秽的 + hl-page:: 330 + ls-type:: annotation + id:: 644a8f8f-6a6e-400a-bc18-c48d24b3ab6e + hl-color:: green + - stark 荒凉的(啥也没有);残酷的(残酷的现实); + hl-page:: 333 + ls-type:: annotation + id:: 644a935b-84fe-4103-8915-a1df599e9915 + hl-color:: green + - nonetheless 虽说如此 + hl-page:: 334 + ls-type:: annotation + id:: 644a95de-ca6f-4036-b50a-629008ebd1ad + hl-color:: green \ No newline at end of file diff --git a/logseq/custom.css b/logseq/custom.css index 2a7e5d8..28ccd12 100644 --- a/logseq/custom.css +++ b/logseq/custom.css @@ -113,8 +113,8 @@ html[data-theme="light"], /* {{ 1.2. Layout corrections */ /* Helps to not overlap content with tabs plugin */ #main-content-container { - margin-top: 1.9em; - padding-top: 1.9em !important; + margin-top: 1.0em; + padding-top: 1.0em !important; } /* Add some white space to the right. (This is because the actual start of the content on the left side is offset if we include the bullets and spacing.) */ @@ -353,7 +353,7 @@ html[data-theme="light"] .search-results-wrap a .rounded.border { padding: 0.3em 0; } .ls-block[level="1"] { - padding-top: 0.6em; + padding-top: 0.3em; } .block-children { padding: 0.16em 0; @@ -655,8 +655,8 @@ html[data-theme="light"] .cm-s-solarized.cm-s-dark .CodeMirror-gutters { /* { == 4. Plugin specific ==> */ /* {{ 4.1. Tabs Plugin */ #main-content-container { - margin-top: 1.9em; - padding-top: 1.9em !important; + margin-top: 1.0em; + padding-top: 1.0em !important; } html[data-theme="dark"] #logseq-tabs_lsp_main { filter: sepia(0) saturate(0.8) opacity(0.85) hue-rotate(360deg) grayscale(0.05) brightness(0.85) invert(0.04); diff --git a/pages/hls__HTMFS_Strong_Consistency_Comes_for_Free_with_1682647018871_0.md b/pages/hls__HTMFS_Strong_Consistency_Comes_for_Free_with_1682647018871_0.md index dbd8bbb..1f7147e 100644 --- a/pages/hls__HTMFS_Strong_Consistency_Comes_for_Free_with_1682647018871_0.md +++ b/pages/hls__HTMFS_Strong_Consistency_Comes_for_Free_with_1682647018871_0.md @@ -124,4 +124,117 @@ file-path:: ../assets/HTMFS_Strong_Consistency_Comes_for_Free_with_1682647018871 - 非常自然的想法:把一个操作,或者说trx,拆成小的 - 三种操作分类: - Read、Invisible write(内部的东西,比如块分配信息) 、Visible write(应用程序可见的) - - 只有 Visible Write 用 HTM 实现,Invisible 和 Read 通过别的机制来实现 \ No newline at end of file + - 只有 Visible Write 用 HTM 实现,Invisible 和 Read 通过别的机制来实现 + - (对于一整个FS操作)首先在 RTM 外面完成所需的 read 和 invisible,然后用 RTM 完成 visible,保证它们是 atomic 的。 + hl-page:: 5 + ls-type:: annotation + id:: 64533b91-7e2b-448b-bb4f-807cc7cb5a77 + hl-color:: yellow + - RTM外面的部分不保证并发安全,用 sequence count 来解决这个问题。 + hl-page:: 5 + ls-type:: annotation + id:: 64533cf1-c232-4630-8cf9-88cf4dbfe0c2 + hl-color:: yellow + - 这东西的原型是 Linux 里面的 `seqlock` 。基本原理可以看这个。 [SeqLock中文博客](http://www.wowotech.net/kernel_synchronization/seqlock.html)。 然后这里的机制大概就是把 writer lock 给换成了 RTM,虽然细节上不是很一样 + - 在读之前(RTM外)先记录 seqcount,然后进入 RTM 之后进行验证,需要保证在 RTM commit 之前这些 seqcount 都没变。如果有变化,就需要重新回到 RTM 外面的某一点重新读取 + - 当然如果是 accidental abort,那么只需要重新执行RTM就可以了(就算前面读的数据炸了,反正还是要验证一次的) + - 不过有个问题它没说,就是 invisible write,它没有RTM保证不会写冲突啊?不过也有可能,它会保证 invisible 不涉及临界区?? + - Discussion of concurrency correctness. + ls-type:: annotation + hl-page:: 5 + hl-color:: yellow + id:: 6453427e-ce5a-4746-960b-45f9546e8b37 + - Read-Read显然不冲突 + - Write-Write 他说potential conflicting write 会被RTM保护起来,不过有可能会live lock掉 + - Write-Read 也就是如果在RTM里面写入并且commit之前读取,会导致RTM abort + - Read-Write 的情况就比较复杂了。作者给了个图,考虑了 T1 读,同时有T2写的例子 + - T2 在 T1 验证之前写:Valid fail 然后重新读。不过其实是有一个 overlap 的,图里面的第三个B点应该也会 RTM-abort + - T2 在 T1 验证之后、RTM commit 之前写:因为读取的数据在 RTM 的 read-set 里面,所有还是会 RTM abort +- 3.2 File Operations + ls-type:: annotation + hl-page:: 6 + hl-color:: yellow + id:: 64534050-8459-4719-866e-612b76560e00 + - 其实看 Figure 3 就行了 + - 3.2.1 Data Read + ls-type:: annotation + hl-page:: 6 + hl-color:: yellow + id:: 64534bc6-4bde-47c6-9fa2-3a085ff59c66 + - 前面也写了,用 seqcnt 机制来保证读一致性。众所周知,每个文件的inode里面都有block指针,这里给每个指针都配上了一个seqno,读的时候就用这些个东西 + - 3.2.2 Data Write + ls-type:: annotation + hl-page:: 6 + hl-color:: yellow + id:: 64534cee-e35f-4e07-a79d-9185e36b57a2 + - convert data updates to metadata updates that can be embedded in the RTM transactions. + ls-type:: annotation + hl-page:: 6 + hl-color:: yellow + id:: 645357b1-b9ab-456c-87b3-c1047ec5ce59 + - small writes,直接塞进RTM + hl-page:: 6 + ls-type:: annotation + id:: 64535855-08bc-484d-951e-6cf191d5729f + hl-color:: yellow + - 大量写入,先把数据写入PM,然后再更新元数据。这时候用的就是block指针了。具体:空间分配是用的DRAM数据;(存疑?然后 shadow page 写文件数据);最后 RTM 修改 FS 元数据,allocation 的修改这个时候才会被持久化。如果 trx 提交之前炸了,顶多就是写了点垃圾数据到空白位置罢了。 + - But the blocks may have leaked after a system crash. 没看懂,为啥会leak啊?你的 alloc 数据又没落盘。 + hl-stamp:: 1683184901505 + hl-page:: 6 + ls-type:: annotation + id:: 64535ce3-9927-4e22-b2ed-9c396c3b43f2 + hl-color:: red + - 不过看上去,它们这边是认为分配一个块是个双向的过程,不仅会在总的freelist里面有记录,block 里面也有一个记录表明它的分配情况? + - 3.2.3 Allocation + ls-type:: annotation + hl-page:: 7 + hl-color:: yellow + id:: 64534d02-3d6b-4dd4-8d3f-e86767190f08 + - 首先把已经分配的块放进一个 temporal allocating list 里面 (大概会在 alloc 阶段持久化?),如果commit成功就不管它,如果出事了就把这个里面的块还回去 + hl-page:: 7 + ls-type:: annotation + id:: 64535d27-c876-4481-856e-8962e5ba9b16 + hl-color:: yellow + - 在释放块的时候,也会存在中途 crash 的问题。一般是先删掉reference 然后再释放块,如果没释放完,那这个块就leak了。还是有和前面类似的疑问,这东西不是 引用一删就已经整完了吗? + - the file system crashes after a reference to a block of data has been removed (when this block of memory has not yet been freed) + ls-type:: annotation + hl-page:: 7 + hl-color:: red + id:: 64535f97-b733-4522-a2d1-dbfc8837799e + hl-stamp:: 1683185566331 +- 3.3 Directory Operations + ls-type:: annotation + hl-page:: 7 + hl-color:: yellow + id:: 64534d0c-9fe2-4817-b510-5d2da7d55dbf + - 3.3.1 Path Walk + ls-type:: annotation + hl-page:: 7 + hl-color:: yellow + id:: 64534d0f-1882-4633-9cd4-fce9d411aeb9 + - 3.3.2 Directory Updates + ls-type:: annotation + hl-page:: 7 + hl-color:: yellow + id:: 64534d19-b941-4c04-a084-2d8a09d00883 +- 3.4 Other File Types + ls-type:: annotation + hl-page:: 8 + hl-color:: yellow + id:: 64534d23-1327-40ef-b58e-7c5a01559552 + - 就写了一个 symlink,但是这玩意就是其他操作的组合罢了,没啥特别的东西 +- 3.5 The Timestamps + ls-type:: annotation + hl-page:: 8 + hl-color:: yellow + id:: 64534d33-2bfe-4817-a5bd-0cc3534584d7 +- 3.6 The Special Case: Rename + ls-type:: annotation + hl-page:: 8 + hl-color:: yellow + id:: 64534d37-b14f-4943-92c0-7bb4823fbd50 +- 4 Implementation + ls-type:: annotation + hl-page:: 9 + hl-color:: yellow + id:: 64534d45-e498-427d-b4ef-74eaf56f7aae \ No newline at end of file