[{"data":1,"prerenderedAt":1842},["ShallowReactive",2],{"blog-internal-signoz":3,"related-internal-signoz":780},{"id":4,"title":5,"author":6,"body":7,"category":763,"date":764,"description":765,"draft":766,"extension":767,"image":768,"meta":769,"navigation":770,"path":771,"seo":772,"stem":773,"tags":774,"__hash__":779},"blog/blog/internal-signoz.md","架設公司內部使用的 SigNoz 服務的紀錄隨筆","Ting Zhang",{"type":8,"value":9,"toc":741},"minimark",[10,14,18,31,34,37,40,99,105,109,114,332,336,339,344,347,352,355,360,363,368,371,376,381,385,402,405,408,411,429,438,470,485,488,495,515,520,524,529,532,535,559,562,566,571,577,602,617,620,644,648,657,664,668,671,687,691,698,708,711],[11,12,13],"h2",{"id":13},"前言",[15,16,17],"p",{},"如果有分散式服務，目前可能遇到的問題：",[19,20,21,25,28],"ul",{},[22,23,24],"li",{},"有錯的時候需要各自看service log，沒有統一的 trace 可以串起來",[22,26,27],{},"效能瓶頸要靠「感覺」和「經驗」來猜",[22,29,30],{},"CloudWatch 的 console 分散在好幾個頁面，排查一個問題要在 Logs Insights、X-Ray、Metrics 之間跳來跳去",[15,32,33],{},"目標：建一套統一的 Observability 平台，讓 Metrics / Traces / Logs 都在同一個地方查看。",[11,35,36],{"id":36},"方案",[15,38,39],{},"我們支援兩種 Observability Backend，依客戶的需求和預算擇一部署：",[41,42,43,62],"table",{},[44,45,46],"thead",{},[47,48,49,53,56,59],"tr",{},[50,51,36],"th",{"align":52},"left",[50,54,55],{"align":52},"Collector",[50,57,58],{"align":52},"Backend",[50,60,61],{"align":52},"場景",[63,64,65,83],"tbody",{},[47,66,67,74,77,80],{},[68,69,70],"td",{"align":52},[71,72,73],"strong",{},"SigNoz",[68,75,76],{"align":52},"SigNoz OTel Collector (EKS)",[68,78,79],{"align":52},"SigNoz + ClickHouse",[68,81,82],{"align":52},"需要完整 APM + Trace 分析，預算可控",[47,84,85,90,93,96],{},[68,86,87],{"align":52},[71,88,89],{},"CloudWatch",[68,91,92],{"align":52},"AWS ADOT Collector (本地 Docker)",[68,94,95],{"align":52},"CloudWatch + X-Ray",[68,97,98],{"align":52},"已深度使用 AWS 生態系，不想額外維運",[100,101,102],"blockquote",{},[15,103,104],{},"關鍵設計：兩種方案都使用 OTLP HTTP (:4318)，切換只需改 OTEL_EXPORTER_OTLP_ENDPOINT 一個環境變數。應用端的程式碼完全不用改，未來要換 vendor 也零成本。",[11,106,108],{"id":107},"cloudwatch-vs-signoz功能比較","CloudWatch vs SigNoz：功能比較",[100,110,111],{},[15,112,113],{},"AI gen 的, 一些常見功能是對的，但是比較深入的功能 (ex: Cloudwatch AI Operations) 沒特別驗證過",[41,115,116,128],{},[44,117,118],{},[47,119,120,123,126],{},[50,121,122],{},"功能領域",[50,124,125],{},"AWS CloudWatch",[50,127,73],{},[63,129,130,143,156,169,181,194,207,220,232,245,258,271,283,295,308,320],{},[47,131,132,137,140],{},[68,133,134],{},[71,135,136],{},"Traces",[68,138,139],{},"X-Ray（單次查詢限 6 小時）",[68,141,142],{},"Traces Explorer（任意時間範圍）",[47,144,145,150,153],{},[68,146,147],{},[71,148,149],{},"Metrics",[68,151,152],{},"CloudWatch Metrics",[68,154,155],{},"ClickHouse + PromQL",[47,157,158,163,166],{},[68,159,160],{},[71,161,162],{},"Logs",[68,164,165],{},"Logs Insights（專有語法）",[68,167,168],{},"Logs Explorer（ClickHouse SQL）",[47,170,171,176,179],{},[68,172,173],{},[71,174,175],{},"Service Map",[68,177,178],{},"Application Map",[68,180,175],{},[47,182,183,188,191],{},[68,184,185],{},[71,186,187],{},"Dashboard",[68,189,190],{},"超過 3 個需付費",[68,192,193],{},"無限免費",[47,195,196,201,204],{},[68,197,198],{},[71,199,200],{},"Alerts",[68,202,203],{},"CloudWatch Alarms",[68,205,206],{},"可對 Metrics/Traces/Logs 設警報",[47,208,209,214,217],{},[68,210,211],{},[71,212,213],{},"AI 根因分析",[68,215,216],{},"Investigations（生成式 AI）",[68,218,219],{},"無",[47,221,222,227,230],{},[68,223,224],{},[71,225,226],{},"GenAI 監控",[68,228,229],{},"Bedrock AgentCore 整合",[68,231,219],{},[47,233,234,239,242],{},[68,235,236],{},[71,237,238],{},"合成監控 / RUM",[68,240,241],{},"Synthetics Canaries + RUM",[68,243,244],{},"無（需外部工具）",[47,246,247,252,255],{},[68,248,249],{},[71,250,251],{},"Container",[68,253,254],{},"Container Insights（深度 ECS/EKS 整合）",[68,256,257],{},"透過 OTel Collector 收集",[47,259,260,265,268],{},[68,261,262],{},[71,263,264],{},"Lambda / DB",[68,266,267],{},"Lambda Insights / Database Insights",[68,269,270],{},"無原生支援",[47,272,273,278,281],{},[68,274,275],{},[71,276,277],{},"網路監控",[68,279,280],{},"Flow / Internet / Synthetic Monitors",[68,282,219],{},[47,284,285,290,293],{},[68,286,287],{},[71,288,289],{},"Log 異常偵測",[68,291,292],{},"ML 自動偵測",[68,294,219],{},[47,296,297,302,305],{},[68,298,299],{},[71,300,301],{},"例外監控",[68,303,304],{},"無原生功能",[68,306,307],{},"Exceptions Monitoring（自動從 trace 擷取）",[47,309,310,315,317],{},[68,311,312],{},[71,313,314],{},"Messaging Queue",[68,316,304],{},[68,318,319],{},"Kafka / Celery 監控",[47,321,322,327,329],{},[68,323,324],{},[71,325,326],{},"Log Pipeline",[68,328,219],{},[68,330,331],{},"支援解析、轉換、trace 關聯",[333,334,335],"h3",{"id":335},"選型邏輯",[15,337,338],{},"怎麼幫客戶選？在 APM 和分散式追蹤這個場景，SigNoz 明顯更適合：",[15,340,341],{},[71,342,343],{},"1. 統一介面 vs 四散的 Console",[15,345,346],{},"CloudWatch 排查一個問題，需要在 Logs Insights、Metrics console、X-Ray、Application Signals 之間跳轉，每個的查詢語法還不一樣。SigNoz 把 Metrics / Traces / Log 放在同一個介面，從延遲高的 trace 直接點進去看對應的 log，不需要切 console。",[15,348,349],{},[71,350,351],{},"2. X-Ray 的 6 小時查詢限制",[15,353,354],{},"X-Ray 雖然保留 30 天的追蹤資料，但 UI 上一次只能查 6 小時的範圍。想看一整天的 trace？要手動跑 4 次查詢再拼起來。SigNoz 沒有這個限制。",[15,356,357],{},[71,358,359],{},"3. OpenTelemetry 原生",[15,361,362],{},"SigNoz 直接基於 OpenTelemetry 打造，資料進來就能用。CloudWatch 需要透過 ADOT（AWS Distro for OpenTelemetry）做中間轉換，多了一層 mapping 就多了一層可能出問題的地方。",[15,364,365],{},[71,366,367],{},"4. 成本結構",[15,369,370],{},"自建 SigNoz 只需要 EKS 上的計算資源，費用可控且可預測。CloudWatch 的計費項目多且分散：自訂指標、Dashboard、GetMetricData API 呼叫、Log 儲存和查詢、X-Ray trace 記錄⋯每一項都獨立計費，帳單很難預估。",[100,372,373],{},[15,374,375],{},"但如果客戶已經重度使用 AWS 生態系（Lambda、ECS、RDS 等），CloudWatch 的 Lambda Insights、Container Insights、Database Insights、AI Investigations 這些整合是 SigNoz 目前無法替代的。",[15,377,378],{},[71,379,380],{},"選型建議：以 APM + 分散式追蹤為主要需求 → SigNoz。以 AWS 基礎設施監控為主且不想額外維運 → CloudWatch。根據客戶的實際需求和預算，選擇其中一套。",[11,382,384],{"id":383},"signoz-部署-資源配置-驗證","SigNoz 部署 - 資源配置 & 驗證",[15,386,387,388,396,397,401],{},"SigNoz 用官方 Helm chart 部署到 EKS 上，具體的 YAML 和指令都在 ",[389,390,395],"a",{"href":391,"rel":392,"title":394},"https://github.com/%7Brepo_address%7D/tree/main/opentelemetry",[393],"nofollow","https://github.com/{repo_address}/tree/main/opentelemetry","repo"," 裡的 ",[398,399,400],"code",{},"signoz/"," 目錄，這邊只講幾個需要思考的設計決策。",[333,403,404],{"id":404},"驗證",[15,406,407],{},"這次 gc-host 上部署的 SigNoz 不使用 ip whitelist 限制存取",[15,409,410],{},"有兩個對外端點，目前透過以下方式驗證：",[15,412,413,421,424,425,428],{},[389,414,418],{"href":415,"rel":416,"title":417},"https://signoz.internal.xx.ai/",[393],"https://signoz.internal.xx.ai",[71,419,420],{},"UI",[71,422,423],{},"（"," ",[398,426,427],{},"signoz.internal.xx.ai","）— 直接使用 SigNoz 內建的 SSO，接 Google OAuth 登入。團隊成員用公司 Google 帳號登入即可，不需要額外管理帳號密碼。",[15,430,431,424,434,437],{},[71,432,433],{},"OTLP Collector（",[398,435,436],{},"signoz-otel.internal.xx.ai","）— 這是機器對機器的端點，不適合走 SSO。做了兩層防護：",[439,440,441,464],"ol",{},[22,442,443,446,447,450,451,454,455],{},[71,444,445],{},"Bearer Token 驗證"," — nginx ingress 的 ",[398,448,449],{},"configuration-snippet"," 檢查 ",[398,452,453],{},"Authorization"," header，不帶正確 token 直接回 401\n",[439,456,457],{},[22,458,459,460,463],{},"Token 放在 1Password 的 ",[398,461,462],{},"Internal Signoz Collector"," 內",[22,465,466,469],{},[71,467,468],{},"Rate Limiting"," — 100 req/s per IP，burst 500，防止意外或惡意的大量灌入",[15,471,472,473,476,477,480,481,484],{},"Token 存在 ",[398,474,475],{},".env","（gitignored），部署時透過 ",[398,478,479],{},"envsubst"," 注入到 ingress yaml。服務端透過 ",[398,482,483],{},"OTEL_EXPORTER_OTLP_HEADERS"," 環境變數帶入，不需要改任何程式碼。",[333,486,487],{"id":487},"資源配置策略",[15,489,490,491,494],{},"因為是內部 dev 環境，整體策略是",[71,492,493],{},"壓到最小夠用"," ：",[19,496,497,503,509],{},[22,498,499,502],{},[71,500,501],{},"ClickHouse"," ：1Gi request / 3Gi limit，20Gi gp3 磁碟",[22,504,505,508],{},[71,506,507],{},"TTL"," ：Traces 和 Metrics 都只保留 3 天",[22,510,511,514],{},[71,512,513],{},"關掉不需要的元件"," ：alertmanager、zookeeper 在 dev 環境沒有用",[100,516,517],{},[15,518,519],{},"所有元件都明確設了 resource requests 和 limits。不設 limits 的話，ClickHouse 非常容易吃光整個 node 的記憶體。",[333,521,523],{"id":522},"clickhouse-磁碟管理","ClickHouse 磁碟管理",[100,525,526],{},[15,527,528],{},"之前架設 Signoz 還在測試的時候有發現記憶體資源不足導致 Service CrashLoop, 目前猜這個解法可以解決，但還要擺一陣子看看",[15,530,531],{},"20Gi 的 EBS 搭配 3 天 TTL，理論上不會爆。但 ClickHouse 的 TTL 清理是惰性的（不會主動跑），如果突然灌入大量 trace，磁碟可能來不及釋放。",[15,533,534],{},"所以加了一個 K8s CronJob，每 6 小時檢查磁碟使用率：",[19,536,537,543,553],{},[22,538,539,542],{},[71,540,541],{},"\u003C 75%"," ：正常，不做任何事",[22,544,545,548,549,552],{},[71,546,547],{},">= 75%"," ：強制 ",[398,550,551],{},"OPTIMIZE TABLE ... FINAL","，觸發 TTL 清理",[22,554,555,558],{},[71,556,557],{},">= 90%"," ：緊急模式，直接 drop 最舊的 partition",[11,560,561],{"id":561},"應用如何接入",[333,563,565],{"id":564},"demo-apps","Demo Apps",[100,567,568],{},[15,569,570],{},"目前Signoz UI & collector 的憑證還在處理，好了的話就不用 skip ssl 驗證，目前先用個 workaround 的方法繞過去",[15,572,573,576],{},[389,574,395],{"href":391,"rel":575,"title":394},[393]," 裡有三個 demo 應用（Express、NestJS、FastAPI），涵蓋了幾種常見場景：",[19,578,579,582,585,588,599],{},[22,580,581],{},"一般 API 回應",[22,583,584],{},"慢回應模擬（500ms delay）",[22,586,587],{},"DB 查詢（SQLAlchemy）",[22,589,590,591,598],{},"外部 HTTP 呼叫（httpx → ",[389,592,595],{"href":593,"rel":594},"http://httpbin.org/",[393],[71,596,597],{},"httpbin.org"," ）",[22,600,601],{},"Error logging",[15,603,604,605,608,609,612,613,616],{},"用 ",[398,606,607],{},"make demo"," 一鍵啟動，",[398,610,611],{},"make demo-traffic"," 打流量，",[398,614,615],{},"make verify-signoz"," 驗證端到端是否通。",[333,618,619],{"id":619},"三個設計原則",[439,621,622,628,638],{},[22,623,624,627],{},[71,625,626],{},"盡量不寫 SDK wrapper"," — 直接使用官方 OpenTelemetry SDK，跟著社群走，減少維護負擔",[22,629,630,633,634,637],{},[71,631,632],{},"所有配置走環境變數"," — 程式碼裡零 hardcode，SDK 自動讀取 ",[398,635,636],{},"OTEL_*"," 環境變數",[22,639,640,643],{},[71,641,642],{},"tracing 初始化"," — 在任何其他 import / app 初始化之前，否則可能會漏掉早期的 span",[333,645,647],{"id":646},"共用-backend-如何分辨service","共用 backend 如何分辨service",[19,649,650],{},[22,651,652,653,656],{},"SigNoz 可以設定客製化的 filter 欄位，透過環境變數 ",[398,654,655],{},"OTEL_RESOURCE_ATTRIBUTES"," 來進行標注，UI上就會出現 filter",[15,658,659],{},[660,661],"img",{"alt":662,"src":663},"","/images/blog/internal-signoz/image.png",[333,665,667],{"id":666},"切換-backend-修改環境變數","切換 Backend = 修改環境變數",[15,669,670],{},"這是整個架構最核心的設計：不管客戶選哪套 backend，應用端的程式碼完全一樣，只需要改一個環境變數：",[19,672,673,680],{},[22,674,675,676,679],{},"選 SigNoz → ",[398,677,678],{},"OTEL_EXPORTER_OTLP_ENDPOINT=\u003Chttps://signoz-otel.internal.xx.ai",">",[22,681,682,683,686],{},"選 CloudWatch → ",[398,684,685],{},"OTEL_EXPORTER_OTLP_ENDPOINT=\u003Chttp://localhost:4318","（透過本地> ADOT Collector）",[333,688,690],{"id":689},"sampling-策略","Sampling 策略",[15,692,693,694,697],{},"所有服務統一使用 10% head-based sampling（",[398,695,696],{},"parentbased_traceidratio","）。",[15,699,604,700,703,704,707],{},[398,701,702],{},"parentbased"," 而不是純 ",[398,705,706],{},"traceidratio"," 是為了確保：如果上游服務決定要 sample 這個 trace，下游服務也會保留，不會斷掉整條追蹤鏈。",[11,709,710],{"id":710},"參考資料",[19,712,713,720,727,734],{},[22,714,715],{},[389,716,719],{"href":717,"rel":718,"title":717},"https://signoz.io/docs/introduction/",[393],"SigNoz Documentation",[22,721,722],{},[389,723,726],{"href":724,"rel":725,"title":724},"https://opentelemetry.io/docs/",[393],"OpenTelemetry Documentation",[22,728,729],{},[389,730,733],{"href":731,"rel":732,"title":731},"https://aws-otel.github.io/docs/introduction",[393],"AWS ADOT Documentation",[22,735,736],{},[389,737,740],{"href":738,"rel":739,"title":738},"https://github.com/SigNoz/charts",[393],"SigNoz Helm Chart",{"title":662,"searchDepth":742,"depth":742,"links":743},2,[744,745,746,750,755,762],{"id":13,"depth":742,"text":13},{"id":36,"depth":742,"text":36},{"id":107,"depth":742,"text":108,"children":747},[748],{"id":335,"depth":749,"text":335},3,{"id":383,"depth":742,"text":384,"children":751},[752,753,754],{"id":404,"depth":749,"text":404},{"id":487,"depth":749,"text":487},{"id":522,"depth":749,"text":523},{"id":561,"depth":742,"text":561,"children":756},[757,758,759,760,761],{"id":564,"depth":749,"text":565},{"id":619,"depth":749,"text":619},{"id":646,"depth":749,"text":647},{"id":666,"depth":749,"text":667},{"id":689,"depth":749,"text":690},{"id":710,"depth":742,"text":710},"技術隨筆","2026-03-10","記錄如何架設與部署 SigNoz 作為公司內部的 Observability 服務，適用於統一管理 Metrics、Traces 和 Logs。",false,"md","/images/blog/internal-signoz/banner.png",{},true,"/blog/internal-signoz",{"title":5,"description":765},"blog/internal-signoz",[73,775,776,777,778],"Observability","APM","分散式追蹤","技術架設","q5GOiX3bLLFhl-rdyCBKycwJu2N4dS4Brco4tgUv2YU",[781,883,1498],{"id":782,"title":783,"author":6,"body":784,"category":872,"date":873,"description":874,"draft":766,"extension":767,"image":662,"meta":875,"navigation":770,"path":876,"seo":877,"stem":878,"tags":879,"__hash__":882},"blog/blog/2025-review.md","2025 Review",{"type":8,"value":785,"toc":870},[786,789,792,807,810,817,820,828,835,838,841,848,853,856,861],[15,787,788],{},"今天是我開始工作的第708天，今年順利從智電系統畢業，順利來到GC成爲比較正式一點的軟體工程師。",[15,790,791],{},"11月底Onboard到現在其實也一個月出頭而已，必須說這一個月的收穫是超乎我想像的，雖然我沒辦法說出具體的感覺，但我可以感受到我正身處一個“正確的環境”",[15,793,794,795,800,801,806],{},"今天同時看到我的兩位師傅 Alex & Danny 的",[389,796,799],{"href":797,"rel":798},"https://www.linkedin.com/feed/update/urn:li:activity:7409841197114028032/",[393],"文章1"," & ",[389,802,805],{"href":803,"rel":804},"https://chenghsuan.me/posts/imposter-syndrome",[393],"文章2","，都在談論冒牌者症候群，我其實一直都處在這樣的狀態，不過這讓我無時無刻都在思考我的不足之處並且補齊，以結果來說暫時是正向的，就先這樣子吧。",[15,808,809],{},"最後可以來一點明年的期許：",[100,811,812],{},[15,813,814],{},[71,815,816],{},"Mindset",[15,818,819],{},"主要是改進一些我自己很annoying我自己的點吧，例如",[19,821,822,825],{},[22,823,824],{},"停止懷疑任何會阻止我變得更好的行為，例如我會花過多的時間去確認我腦子裡冒出來的問題是否是\"不笨的問題\"，然後再花時間琢磨要怎麼問，期許我可以就直接問，被罵了再說",[22,826,827],{},"感覺自己還是有一些心防，例如會有：“這個人會不會其實覺得我很爛？“的念頭，我認為更好的方法還是把這個防備卸下比較好，被背刺再說",[100,829,830],{},[15,831,832],{},[71,833,834],{},"Skills",[15,836,837],{},"問題很大，但也不大，相信繼續待在這個環境下，就算我不想進步也很難",[15,839,840],{},"除了保持現有的幾個主線 (主業 & Moniit & 每週讀書會)，時間允許之下希望自己可以週更這個blog，不管是心態上或是把在GC寫的文章搬來都好，最好是用英文寫",[100,842,843],{},[15,844,845],{},[71,846,847],{},"Languages",[19,849,850],{},[22,851,852],{},"Conversational English",[15,854,855],{},"作為剛換工作的藉口我已經停差不多兩個月了，哈，這個必須要做到，是通往國外工作的必須門票",[19,857,858],{},[22,859,860],{},"Japanese at least N3",[15,862,863,864,869],{},"看起來很難，不過根據我的基礎加上一點運氣(?)，參考 ",[389,865,868],{"href":866,"rel":867},"https://zh.wikipedia.org/zh-tw/%E6%97%A5%E6%9C%AC%E8%AF%AD%E8%83%BD%E5%8A%9B%E6%B5%8B%E8%AF%95",[393],"維基百科: 漢字圈考生學習時數","，絕對是有可能的。",{"title":662,"searchDepth":742,"depth":742,"links":871},[],"生活","2025-12-26","今天WFH，午餐吃新媛煮的泡菜鍋，讚",{},"/blog/2025-review",{"title":783,"description":874},"blog/2025-review",[880,881],"Review","2025","a--I2MAWFPvTaWjOROqM2NxWOXW-vsLpx3tIeVCH4PA",{"id":884,"title":885,"author":6,"body":886,"category":1063,"date":1488,"description":1489,"draft":766,"extension":767,"image":662,"meta":1490,"navigation":770,"path":1491,"seo":1492,"stem":1493,"tags":1494,"__hash__":1497},"blog/blog/2025q3q4-career.md","2025 軟體工程師面試經驗",{"type":8,"value":887,"toc":1463},[888,890,893,896,899,902,916,919,933,936,944,947,950,954,957,960,971,974,978,981,984,987,991,994,997,1000,1004,1007,1010,1037,1040,1042,1045,1049,1053,1061,1064,1067,1069,1073,1076,1086,1089,1094,1110,1115,1126,1132,1134,1138,1141,1163,1166,1171,1174,1185,1190,1207,1212,1217,1220,1223,1225,1229,1232,1249,1252,1257,1262,1267,1272,1277,1282,1290,1295,1297,1301,1304,1317,1320,1326,1332,1337,1351,1353,1357,1360,1365,1368,1373,1407,1410,1413,1415,1418,1421,1424,1441,1443,1447],[11,889,13],{"id":13},[15,891,892],{},"嗨，大家好，我是Ting，是一名全端工程師。\n這篇文是寫給想要轉職工程師的你，希望透過我的經驗分享能對你有幫助。",[15,894,895],{},"我原本在智電系統工作，是一個新創能源公司，除了web相關開發以外，我也投入非常多時間在能源相關的業務邏輯研究與開發，但在工作一年半以後，我認為我對能源相關的業務邏輯已經沒有太大的興趣，工作內容也漸漸被甲方的客製化需求給淹沒，所以我決定離職。",[15,897,898],{},"做這個決定主要有以下考量",[333,900,901],{"id":901},"目標",[19,903,904,907,910,913],{},[22,905,906],{},"更明確的職涯目標",[22,908,909],{},"更高的薪資成長(或是說更穩定)",[22,911,912],{},"WFH或自由的工作時間",[22,914,915],{},"未來有規劃國外工作",[333,917,918],{"id":918},"離職的好處",[19,920,921,924,927,930],{},[22,922,923],{},"可以挑選公司，讓工作內容更集中在開發",[22,925,926],{},"學習新技術，使用一樣的時間來學習更多東西",[22,928,929],{},"作為通往國外工作的敲門磚，必須提升履歷價值，至少到一間有聽過的公司",[22,931,932],{},"更有機會WFH，自由安排自己的時間，原本需要耗費大量通勤時間",[333,934,935],{"id":935},"離職的壞處",[19,937,938,941],{},[22,939,940],{},"原公司是正在IPO階段，離職後需要放棄RSU，算是一筆不小的機會成本",[22,942,943],{},"離開舒適圈，其實原本的工作環境除了需要每日通勤以外，工作內容不太會有人管",[945,946],"hr",{},[11,948,949],{"id":949},"準備",[333,951,953],{"id":952},"_1-程人頻道ted-協助","1. 程人頻道Ted 協助",[15,955,956],{},"首先這篇文章必須 S/O 給程人頻道的Ted，可以說這個計劃沒有他的話就不會這麼順利。\n原本就有在聽Ted的Podcast，他也一直有在提供想要轉換跑道的工程師協助，剛好在我下定決心要轉職以後，\n他上一期的諮詢服務剛結束一陣子，在招下一期的工程師，我把我的履歷與動機整理一下以後提供，就這麼被選上了。",[15,958,959],{},"這個計劃主要包含每週一小時的諮詢時間直到轉職成功，內容主要有",[19,961,962,965,968],{},[22,963,964],{},"履歷健檢",[22,966,967],{},"模擬面試 (BQ / Leetcode / System Design)",[22,969,970],{},"檢視準備方向是否正確",[15,972,973],{},"其實Ted的協助不僅僅是單純轉職而已，我們幾乎花了一樣多的時間在討論與探索“我真正想要的職涯規劃”，透過他的經驗，與整個計劃的進行，我也更聚焦我的職涯目標。",[333,975,977],{"id":976},"_2-專案實作-moniit","2. 專案實作 - Moniit",[15,979,980],{},"延續上一點，除了每週一小時的諮詢以外，程人頻道也一直有在經營社群開發的項目，剛好他們近期規劃的資產管理App - Moniit正在開發早期階段，透過這個機會我也幾乎參與了這個產品從0開始的開發過程，",[15,982,983],{},"我主要參與了後端開發，第一次使用FastAPI作為主要框架，且實戰了System Design，如何根據實際狀況來取捨設計，怎麼樣算是好的設計，怎麼樣算是Over Design。",[15,985,986],{},"並且這個專案有非常完善的K8s設計，包括但不限於helm, hpa, IaC框架等，這些都是我第一次接觸，透過實戰我對於k8s以及gcp相關雲生態系有更深入的了解。",[333,988,990],{"id":989},"_3-leetcode","3. leetcode",[15,992,993],{},"我leetcode只寫了166題 (E:72/M:82/H:12)，主要參考Blind 75跟Neetcode 150，如果對該演算法不熟，我會先把Hello Algo相關的文章看完，然後練習，最後再找Neetcode的影片來複習以及參考他的解題思路，慢慢把整個解題思維培養起來。",[15,995,996],{},"但相較於大部分的工程師，我覺得我在Leetcode的投入相對較少，一部分是因為瞄準公司的是台商居多，而不是Fang等一線外商，另一部分則是我花了更多時間在實際開發專案來獲取實務經驗。",[15,998,999],{},"我認為台商的OA的題目好像沒有遇到真的很難的，以及面試過程也很少遇到演算法題目，基本上都是履歷相關的問答，但若是要以更高水準的公司為目標的話，我認爲還需要投入更多的時間與精力。",[333,1001,1003],{"id":1002},"_4-履歷","4. 履歷",[15,1005,1006],{},"可以參考標準的Latex模板，基本上是分成Work Experience、Side Projects、Education、Technical Skills四個部分。",[15,1008,1009],{},"各個部分我認為的重點",[19,1011,1012,1020,1031,1034],{},[22,1013,1014,1015],{},"General\n",[19,1016,1017],{},[22,1018,1019],{},"根據職位寫公版的履歷，再根據投遞目標的JD來微調",[22,1021,1022,1023],{},"Work Experience\n",[19,1024,1025,1028],{},[22,1026,1027],{},"強調技術相關的經驗，並且盡量量化指標(ex: rps, qps, 節省多少時間, 節省多少成本等等)",[22,1029,1030],{},"若有PM相關經驗，我覺得是個加分",[22,1032,1033],{},"Education: 簡單即可，基本上只看畢業校系，除非你的專案或論文與JD高度相關，不然我感覺放了只是佔版面而已",[22,1035,1036],{},"Technical Skills: 盡量跟JD上提到的技術棧相同或高度相關",[15,1038,1039],{},"以現今的web相關工作來看，除了前後端與資料庫技術棧以外，Docker與K8s基本上一定會出現在JD的Preferred Skills中，面試中如果可以帶一下或是有被問到並且回答的不錯是個大加分，所以可以的話一定要補一下相關知識。",[945,1041],{},[11,1043,1044],{"id":1044},"面試",[333,1046,1048],{"id":1047},"_1-cardlytics-full-stack-engineer","1. Cardlytics Full Stack Engineer",[1050,1051,1052],"h4",{"id":1052},"時程",[19,1054,1055,1058],{},[22,1056,1057],{},"D+0: 投遞履歷",[22,1059,1060],{},"D+2: Quick chat with Manager",[1050,1062,1063],{"id":1063},"心得",[15,1065,1066],{},"這是我第一個獲得投遞履歷有後續的公司，但這間對我來說實在是越級打怪，需要的職位要求以及語言我都還無法勝任，但也很趕謝Manager還是跟我聊了30分鐘，且知道我的狀況後給了我一些方向與建議，也不吝於分享Cardlytics的產品。",[945,1068],{},[333,1070,1072],{"id":1071},"_2-捷思科技-full-stack-engineer-偏前端","2. 捷思科技 - Full Stack Engineer (偏前端)",[1050,1074,1052],{"id":1075},"時程-1",[19,1077,1078,1080,1083],{},[22,1079,1057],{},[22,1081,1082],{},"D+7: 現場上機測驗 + 面試",[22,1084,1085],{},"D+14: Reject",[1050,1087,1063],{"id":1088},"心得-1",[15,1090,1091],{},[71,1092,1093],{},"Pair Programming 題目：",[439,1095,1096,1099],{},[22,1097,1098],{},"寫一個翻牌互動介面 (HTML/CSS hover 翻轉效果)",[22,1100,1101,1102,1105,1106,1109],{},"不使用 ",[398,1103,1104],{},"Promise.all"," 實作一個 ",[398,1107,1108],{},"PromiseAllLike"," function",[15,1111,1112],{},[71,1113,1114],{},"Behavior Question：",[439,1116,1117,1120,1123],{},[22,1118,1119],{},"目前工作遇過最困難的難題以及如何解決",[22,1121,1122],{},"如果接手 co-workers 的程式很糟的話，會怎麼處理",[22,1124,1125],{},"若工作上遇到某個流程很冗長很麻煩的話，會怎麼處理",[15,1127,1128,1131],{},[71,1129,1130],{},"總結：","\n一間小公司，要找偏前端的，覺得我的技術棧並不符合，算是個面試經驗。",[945,1133],{},[333,1135,1137],{"id":1136},"_3-trend-micro","3. Trend Micro",[1050,1139,1052],{"id":1140},"時程-2",[19,1142,1143,1145,1148,1151,1154,1157,1160],{},[22,1144,1057],{},[22,1146,1147],{},"D+5: 收到 Online Assignment 邀請",[22,1149,1150],{},"D+11: 完成 Online Assignment",[22,1152,1153],{},"D+14: 一面",[22,1155,1156],{},"D+32: 收到二面邀請",[22,1158,1159],{},"D+39: 二面",[22,1161,1162],{},"D+46: 寄信詢問未回，無聲卡",[1050,1164,1063],{"id":1165},"心得-2",[15,1167,1168],{},[71,1169,1170],{},"一面內容：",[15,1172,1173],{},"自我介紹：",[19,1175,1176,1179,1182],{},[22,1177,1178],{},"學經歷",[22,1180,1181],{},"工作經驗",[22,1183,1184],{},"Side Project",[15,1186,1187],{},[71,1188,1189],{},"技術問題：",[19,1191,1192,1195,1198,1201,1204],{},[22,1193,1194],{},"RESTful API 用過哪些 Method? (GET, POST, PUT, DELETE)",[22,1196,1197],{},"前端或後端有無 test 相關經驗？",[22,1199,1200],{},"協作相關經驗",[22,1202,1203],{},"Skillset 應用經驗",[22,1205,1206],{},"開發遇到最大的困難",[15,1208,1209],{},[71,1210,1211],{},"二面：",[19,1213,1214],{},[22,1215,1216],{},"一樣是根據履歷問答居多，還有畫參與過的專案的架構圖",[1050,1218,1219],{"id":1219},"總結",[15,1221,1222],{},"隔那麼久是因為中間家人生病需要照顧，所以耽誤了一些時間，趨勢那邊也表示體諒，配合我改期。\n趨勢應該是我原本目標的其中一間，雖然薪水可能開不高，但是WFH政策以及公司地理位置非常加分。",[945,1224],{},[333,1226,1228],{"id":1227},"_4-gilacloud-backend-集雅科技","4. GilaCloud Backend / 集雅科技",[1050,1230,1052],{"id":1231},"時程-3",[19,1233,1234,1237,1240,1243,1246],{},[22,1235,1236],{},"D+0: 獵頭連繫",[22,1238,1239],{},"D+3: 確認投遞",[22,1241,1242],{},"D+8: 完成 OA",[22,1244,1245],{},"D+49: 一面",[22,1247,1248],{},"D+66: Reject",[1050,1250,1063],{"id":1251},"心得-3",[15,1253,1254],{},[71,1255,1256],{},"Coderbyte OA：",[19,1258,1259],{},[22,1260,1261],{},"難度約 Easy ~ Medium",[15,1263,1264],{},[71,1265,1266],{},"一面 (Online Interview)：",[15,1268,1269],{},[71,1270,1271],{},"技術主管提問：",[19,1273,1274],{},[22,1275,1276],{},"履歷相關題目（內容沒印象了）",[15,1278,1279],{},[71,1280,1281],{},"人資提問：",[19,1283,1284,1287],{},[22,1285,1286],{},"基本 BQ",[22,1288,1289],{},"詢問公司內是否有讀書會等進修機制，被洗臉",[15,1291,1292,1294],{},[71,1293,1130],{},"\n中間隔那麼久應該是因為我OA分數勉勉強強過關而已，所以等前面的人都面完了才輪的到我。\n技術主管雖然對能源領域不懂，但問的還蠻深入的，整體感覺不錯，但人資面試給我蠻扣分的印象。",[945,1296],{},[333,1298,1300],{"id":1299},"_5-crescendo-lab","5. Crescendo Lab",[1050,1302,1052],{"id":1303},"時程-4",[19,1305,1306,1308,1311,1314],{},[22,1307,1057],{},[22,1309,1310],{},"D+1: 收到一面邀請",[22,1312,1313],{},"D+7: 一面 (CTO / HR quick call)",[22,1315,1316],{},"D+10: Reject",[1050,1318,1319],{"id":1319},"面試過程",[15,1321,1322,1325],{},[71,1323,1324],{},"Tech Lead","\n主要詢問了我目前公司的業務內容與產品細節，以及在專案中遇到的挑戰和我的解決經驗。技術面則聚焦於過往處理大數據、API 效能優化（例如如何調整資料庫 index）、與太陽能管理系統相關的技術細節（如數據量、QPS、缺值補償等）。也有探討到專案部署、監控、以及是否運用 AI 工具提升效率等經驗，整體問題偏重於實務經驗及技術應用。",[15,1327,1328,1331],{},[71,1329,1330],{},"HR","\nHR 主要詢問了一些基本背景與求職動機，像是過去的經歷、為何想換工作、對公司的了解與興趣，以及對未來工作環境和待遇的期待，同時也有聊到目前是否有使用 AI 工具和如果錄取後的交接安排等。",[15,1333,1334],{},[71,1335,1336],{},"整體感覺：",[19,1338,1339,1342,1345,1348],{},[22,1340,1341],{},"遇到最困難/最挫折的工作經驗，可以再想好一點的故事",[22,1343,1344],{},"DB Indexing 相關知識需補齊",[22,1346,1347],{},"PostgreSQL 相關語法與知識需補齊",[22,1349,1350],{},"錄取機會不高，就算錄取，薪水應該也會蠻低",[945,1352],{},[333,1354,1356],{"id":1355},"_6-blockriver","6. BlockRiver",[1050,1358,1052],{"id":1359},"時程-5",[19,1361,1362],{},[22,1363,1364],{},"D+0: HR Quick Call",[1050,1366,1063],{"id":1367},"心得-4",[15,1369,1370],{},[71,1371,1372],{},"HR Quick Call 問題：",[439,1374,1375,1378],{},[22,1376,1377],{},"Introduce yourself",[22,1379,1380,1381],{},"Technique questions:\n",[19,1382,1383,1391,1399],{},[22,1384,1385,1386],{},"List 3 methods to communicate between two processes\n",[19,1387,1388],{},[22,1389,1390],{},"答案參考：Pipes、Shared Memory、Message Queues、Sockets、Signals、Redis (in-memory, web-based)",[22,1392,1393,1394],{},"List 6 data containers in STL\n",[19,1395,1396],{},[22,1397,1398],{},"參考：vector, list, deque, set, map, unordered_map 等",[22,1400,1401,1402],{},"Insert and find time complexity for set\n",[19,1403,1404],{},[22,1405,1406],{},"答案：O(log n), O(log n)（註：回答 O(1), O(1) 可能不正確，set 是紅黑樹實作）",[1050,1408,1219],{"id":1409},"總結-1",[15,1411,1412],{},"全英文面試，但我英文講感覺不太順，HR應該是香港或是中國人，直接問我要不要說中文...\n感覺是透過HR快速篩選履歷，照著問題問，我想要請他多敘述一下問題(STL是啥真的沒聽過)都不理睬。",[945,1414],{},[11,1416,1219],{"id":1417},"總結-2",[15,1419,1420],{},"細算下來這半年以來我投了大概接近200個履歷（中間因為家人生病有暫停一個月），有後續的大概只佔不到10%，但一部分是因為我認為我需要累積面試經驗才有辦法在我真正想去的公司的面試中表現得更好，所以基本上職位技術棧有對到我就投了，不管薪資待遇等其他事項，若是你認為你的面試經驗已經充足，也可以指投你想去的公司就好。",[333,1422,1423],{"id":1423},"建議",[19,1425,1426,1429,1432,1435,1438],{},[22,1427,1428],{},"一定要嘗試將目前有參與過的專案，或是Side Projects嘗試繪製成架構圖，並且有辦法在面試的時候從0開始邊畫邊講解為何要選用這個，以及這個設計的優缺點或是取捨等。",[22,1430,1431],{},"一直投履歷就對了，累積面試過程的經驗我覺得是一條必經之路，可以檢視目前的準備是否充足，也可以調整自己在面試過程中的問答表現。",[22,1433,1434],{},"面試趣以及LinkedIn Premiere都可以直接課，在密集投遞履歷的階段幫助很大。",[22,1436,1437],{},"從面試官的角度來思考他們想要怎麼樣特色的人，切記面試的時候需要給面試官”你是來提供價值，是來輸出的“的印象，若有機會的話可以自己延伸一些技術話題，\n只是單純問答的話蠻可惜的，因為一方面對方也是在看未來是否可以跟你順利的合作。",[22,1439,1440],{},"常見的BQ問題一定要先想好一個故事，不要現場遇到問題才想，通常會講到爆掉，或是不那麼適合的回覆。",[945,1442],{},[11,1444,1446],{"id":1445},"reference","Reference",[439,1448,1449,1456],{},[22,1450,1451],{},[389,1452,1455],{"href":1453,"rel":1454},"https://techporn.io/podcast/052c5980-7a04-4bd3-9976-24cf8aeecdc6",[393],"程人頻道EP263",[22,1457,1458],{},[389,1459,1462],{"href":1460,"rel":1461},"https://moniit.com/",[393],"資產管理App - Moniit官方網站",{"title":662,"searchDepth":742,"depth":742,"links":1464},[1465,1470,1476,1484,1487],{"id":13,"depth":742,"text":13,"children":1466},[1467,1468,1469],{"id":901,"depth":749,"text":901},{"id":918,"depth":749,"text":918},{"id":935,"depth":749,"text":935},{"id":949,"depth":742,"text":949,"children":1471},[1472,1473,1474,1475],{"id":952,"depth":749,"text":953},{"id":976,"depth":749,"text":977},{"id":989,"depth":749,"text":990},{"id":1002,"depth":749,"text":1003},{"id":1044,"depth":742,"text":1044,"children":1477},[1478,1479,1480,1481,1482,1483],{"id":1047,"depth":749,"text":1048},{"id":1071,"depth":749,"text":1072},{"id":1136,"depth":749,"text":1137},{"id":1227,"depth":749,"text":1228},{"id":1299,"depth":749,"text":1300},{"id":1355,"depth":749,"text":1356},{"id":1417,"depth":742,"text":1219,"children":1485},[1486],{"id":1423,"depth":749,"text":1423},{"id":1445,"depth":742,"text":1446},"2025-11-13","Software engineer interview experience in 2025.",{},"/blog/2025q3q4-career",{"title":885,"description":1489},"blog/2025q3q4-career",[1495,1496,881],"Interview","Career","dIQ4AxjCw-2ZyrTZoptA-22qc9EV3GQKUOWsMTIFHmE",{"id":1499,"title":1500,"author":6,"body":1501,"category":1825,"date":1826,"description":1827,"draft":766,"extension":767,"image":1828,"meta":1829,"navigation":770,"path":1830,"seo":1831,"stem":1832,"tags":1833,"__hash__":1841},"blog/blog/20260304-troublemaker.md","關於我一天在公司 AWS 開發帳號花掉 14000 鎂的那回事",{"type":8,"value":1502,"toc":1807},[1503,1510,1513,1517,1524,1527,1536,1545,1552,1555,1569,1572,1577,1580,1583,1590,1593,1596,1599,1603,1606,1610,1613,1618,1627,1639,1642,1650,1655,1658,1661,1666,1669,1676,1681,1685,1688,1699,1703,1706,1713,1720,1723,1726,1729,1732,1735,1738,1741,1744,1748,1751,1754,1760,1764,1767,1778,1782,1785,1792,1795,1798,1801,1804],[15,1504,1505,1506,1509],{},"身為工程師，你可能聽過同事不小心把 Production 資料庫砍了、或是忘記關 EC2 多燒了幾百塊。但你有聽過",[71,1507,1508],{},"用雲服務一天燒掉 14,000 美金","的嗎？",[15,1511,1512],{},"沒錯，這件事發生在我身上...嗎？",[11,1514,1516],{"id":1515},"tldr","TL;DR",[15,1518,1519,1520,1523],{},"在公司開發帳號 survey AWS Bedrock AgentCore Policy 功能，試用了 Cedar Policy Generator。幾天後帳單突然出現一筆 ",[71,1521,1522],{},"$14,000+ USD 的單日費用","。經過 CloudTrail 排查確認是 Cedar Generator 觸發的異常計費，最終 AWS 承認是 Bedrock 端的計費 bug，修復並校正了帳單。",[11,1525,1526],{"id":1526},"事發經過",[15,1528,1529,1530,1535],{},"3/4 我因為看到 ",[389,1531,1534],{"href":1532,"rel":1533},"https://aws.amazon.com/bedrock/agentcore/",[393],"AgentCore"," 新 Feature - Policy 剛上線，所以需要 Survey 一下。",[15,1537,1538,1539,1544],{},"AgentCore Policy 讓你可以用 ",[389,1540,1543],{"href":1541,"rel":1542},"https://www.cedarpolicy.com/",[393],"Cedar"," 語言來定義 Agent 的授權策略，控制 Agent 能存取哪些工具和資源。Cedar 是 AWS 開源的授權語言，語法可讀性很高，設計上是要讓非工程師也能看得懂的那種。",[15,1546,1547,1548,1551],{},"而 AgentCore 很貼心地提供了一個 ",[71,1549,1550],{},"Natural Language Policy Generator","——你用自然語言描述你要的權限規則，它就幫你生成對應的 Cedar Policy。聽起來很 User-friendly 對吧？",[15,1553,1554],{},"我就照著文件走了一遍流程：",[439,1556,1557,1560,1563,1566],{},[22,1558,1559],{},"建了一個 Policy Engine",[22,1561,1562],{},"試了幾次 Cedar Policy Generator（用自然語言描述轉成 Cedar）",[22,1564,1565],{},"確認功能可以正常運作",[22,1567,1568],{},"收工，覺得這功能還不錯",[15,1570,1571],{},"整個過程大概就一兩個小時，正常的 survey 流程。",[15,1573,1574],{},[71,1575,1576],{},"然後就沒有然後了——直到五天後。",[11,1578,1579],{"id":1579},"帳單爆炸",[15,1581,1582],{},"後來請假看完中華隊在 WBC 的比賽，打贏韓國那場實在很感動，回來上班的第一天，3/9 下午，Slack突然被狂 tag，我被我們公司的 Billing Manager & Team Lead 問為何會有一筆費用產生，我也趕緊地打開CE，一個讓我愣住的數字：",[100,1584,1585],{},[15,1586,1587],{},[71,1588,1589],{},"3/4 單日Agentcore產生了 $14,000+ USD的費用",[15,1591,1592],{},"我先是以為自己眼花了。重新整理頁面，數字還在那裡。",[15,1594,1595],{},"完了... 哪裡搞錯了吧？",[11,1597,1598],{"id":1598},"排查過程",[333,1600,1602],{"id":1601},"step-1先開-support-ticket","Step 1：先開 Support Ticket",[15,1604,1605],{},"不管三七二十一，先開 AWS Support Ticket 回報異常帳單。把時間範圍、帳號資訊、異常金額都附上去，讓 Support 那邊開始調查。",[333,1607,1609],{"id":1608},"step-2自己也同步排查","Step 2：自己也同步排查",[15,1611,1612],{},"等 Support 回覆的同時，我也開始自己排查紀錄。",[15,1614,1615],{},[71,1616,1617],{},"確認 AgentCore Policy 的收費機制",[15,1619,1620,1621,1626],{},"根據 ",[389,1622,1625],{"href":1623,"rel":1624},"https://aws.amazon.com/bedrock/agentcore/pricing/",[393],"AgentCore 定價頁面","，Policy 的計費主要是：",[19,1628,1629,1636],{},[22,1630,1631,1632,1635],{},"Cedar Policy Generator：",[71,1633,1634],{},"按 input token 數量計費","（每 1,000 tokens）",[22,1637,1638],{},"Policy Engine 的 Authorization 請求：按請求數計費",[15,1640,1641],{},"稍微思考一下：",[439,1643,1644,1647],{},[22,1645,1646],{},"我當天的使用量根本不可能撐到這個金額。就算我瘋狂打 Generator 也打不出 $14,000，同事跟我說 Billing 那邊顯示 Policy 使用了 109 1M Tokens (接近 1.1 億，我甚至不知道一天要怎麼用掉那麼多 Tokens...)。",[22,1648,1649],{},"我的 Policy Engine 並沒有 Attach Gateway，所以也沒有真正的使用 Policy 這個 Feature。",[15,1651,1652],{},[71,1653,1654],{},"確認 Runtime 沒有產生額外費用",[15,1656,1657],{},"AgentCore Runtime 是按 CPU 和記憶體的秒級消耗來計費的。我需要確保沒有遺留的 Runtime 在背景持續運作，或者跟 Gateway 掛鉤產生連鎖費用。",[15,1659,1660],{},"檢查結果：沒有任何遺留資源在跑。",[15,1662,1663],{},[71,1664,1665],{},"CloudTrail",[15,1667,1668],{},"我拉出了那段時間的 CloudTrail 紀錄，逐筆檢查跟 AgentCore 相關的 API 呼叫。",[15,1670,1671,1672,1675],{},"最終鎖定是 ",[398,1673,1674],{},"StartPolicyGeneration"," 這個 API 呼叫。從紀錄上看，我確實只有呼叫了3次，請求量完全不合理對應到那個帳單金額。",[15,1677,1678],{},[71,1679,1680],{},"結論：應該不是我的使用量有問題，是計費那邊有問題。",[333,1682,1684],{"id":1683},"step-3跟-support-同步","Step 3：跟 Support 同步",[15,1686,1687],{},"我把 CloudTrail 的排查結果整理好，回覆到 Support Ticket 上。附上了：",[19,1689,1690,1693,1696],{},[22,1691,1692],{},"明確的 API 呼叫時間和次數",[22,1694,1695],{},"計費金額的不合理性說明",[22,1697,1698],{},"我這邊已經確認沒有遺留資源",[333,1700,1702],{"id":1701},"step-4aws-確認是計費-bug","Step 4：AWS 確認是計費 Bug",[15,1704,1705],{},"AWS Support 將 case 轉給了 Bedrock 團隊。Bedrock 團隊調查後確認：",[100,1707,1708],{},[15,1709,1710],{},[71,1711,1712],{},"Cedar Policy Generator 存在計費問題，導致實際計費金額遠超正常使用量應有的費用。",[15,1714,1715,1716,1719],{},"他們修復了這個計費 bug，並且",[71,1717,1718],{},"校正了帳單","。",[15,1721,1722],{},"結案。",[11,1724,1725],{"id":1725},"心理狀態",[15,1727,1728],{},"看到 $14,000 這個數字的時候，腦袋裡跑過各種最壞的劇本——會不會要自己賠？這是我好幾個月的薪水總和，光想就覺得可怕。",[15,1730,1731],{},"即使理性上告訴自己先查原因再說，甚至基本上確定不是我這邊的問題，但焦慮感還是壓不住。",[15,1733,1734],{},"這幾天嚴重睡眠不足，每天躺在床上腦袋還在轉「到底是哪裡出問題」。腸胃也跟著出狀況，一直拉肚子。壓力對身體的影響比我想像中來得直接。",[15,1736,1737],{},"最後當 CloudTrail 的證據越來越明確指向計費問題，Support 也確認 Bedrock 團隊發現問題並正在進行修復，我的心情才開始慢慢平復。",[11,1739,1740],{"id":1740},"事後反思",[15,1742,1743],{},"雖然這次最後證實是 AWS 的計費 bug，但整個事件讓我學到了很多。",[333,1745,1747],{"id":1746},"_1-工作紀錄真的很重要","1. 工作紀錄真的很重要",[15,1749,1750],{},"當下會感到慌張的原因，很大一部分來自 Context 不足，一來我不確定 Agentcore Policy 是如何計費，為何可以用到那麼貴？二來我不知道我5天前具體做了什麼操作。",[15,1752,1753],{},"如果我當初 survey 的時候沒有留下操作紀錄，排查的時候會更加困難。CloudTrail 能幫你查到 API 呼叫，但你自己當時在做什麼、為什麼做，這些 context 只有你自己知道。",[15,1755,1756,1759],{},[71,1757,1758],{},"養成習慣：每次操作不熟的雲服務時，簡單記錄一下你做了什麼。"," 不需要多詳細，一個簡單的筆記或是 Slack 訊息就夠了。關鍵時刻這些紀錄可以救你一命。",[333,1761,1763],{"id":1762},"_2-使用前搞懂計費方式","2. 使用前搞懂計費方式",[15,1765,1766],{},"這聽起來像廢話，但真的很多人（包括我）在 survey 新服務的時候會直接跳進去玩，不會先仔細看定價頁面。",[19,1768,1769,1772,1775],{},[22,1770,1771],{},"開始 survey 前，先看過定價頁面",[22,1773,1774],{},"特別注意按量計費的服務，搞清楚「量」是怎麼定義的",[22,1776,1777],{},"開發帳號最好設定 Budget Alert，超過閾值自動通知",[333,1779,1781],{"id":1780},"_3-先了解公司的處理流程","3. 先了解公司的處理流程",[15,1783,1784],{},"事發的時候，我其實不太確定公司對這種事情的態度和處理方式。後來跟主管報告時，主管第一句話就是：",[100,1786,1787],{},[15,1788,1789],{},[71,1790,1791],{},"「不可能叫你賠的，先搞清楚狀況就好。」",[15,1793,1794],{},"這句話讓我放下了很大的心理負擔。每間公司的文化不同，但我想大部分公司都不會因為合理操作導致的意外費用而讓員工賠償。",[11,1796,1797],{"id":1797},"結語",[15,1799,1800],{},"回頭看這件事，蠻慶幸最後不是我的問題，也慶幸公司的主管很 Nice",[15,1802,1803],{},"這次經歷讓我建立起了面對雲端成本異常的 SOP。以前覺得帳單管理是 FinOps 團隊的事，現在覺得每個會碰到雲服務的工程師都應該有基本的成本意識。",[15,1805,1806],{},"題外話，這讓我想到我大學時期第一次買股票的時候，我買了3股台積電花了大概 2000塊吧，當時每天看著損益正負三四百塊就能很影響我的心情，影響我做事與學習的效率，直到今天可能每天的損益就是一個月的薪水，我還是照常做著自己的事情。我想有了這次經驗，肯定能讓我在未來遇到類似的事情的時候心態更穩健吧。",{"title":662,"searchDepth":742,"depth":742,"links":1808},[1809,1810,1811,1812,1818,1819,1824],{"id":1515,"depth":742,"text":1516},{"id":1526,"depth":742,"text":1526},{"id":1579,"depth":742,"text":1579},{"id":1598,"depth":742,"text":1598,"children":1813},[1814,1815,1816,1817],{"id":1601,"depth":749,"text":1602},{"id":1608,"depth":749,"text":1609},{"id":1683,"depth":749,"text":1684},{"id":1701,"depth":749,"text":1702},{"id":1725,"depth":742,"text":1725},{"id":1740,"depth":742,"text":1740,"children":1820},[1821,1822,1823],{"id":1746,"depth":749,"text":1747},{"id":1762,"depth":749,"text":1763},{"id":1780,"depth":749,"text":1781},{"id":1797,"depth":742,"text":1797},"技術","2026-03-18","Survey AWS AgentCore Policy完，過了幾天以後才發現帳單噴了14000...","/images/blog/20260304-troublemaker/banner.png",{},"/blog/20260304-troublemaker",{"title":1500,"description":1827},"blog/20260304-troublemaker",[1834,1835,1836,1837,1838,1839,1840],"AWS","雲端","成本控管","經驗分享","踩雷","DevOps","雲服務","6QyxVCpq8RjfovhyvoS0butlFVU7bQKJ1pfTGyL4M1Q",1774237782423]