cmd: ollama launch add placeholder text for selector (#13966 )

cmd: capitalize Ollama in serve command help text (#13965 )
docs: update installation command for OpenCode CLI (#13971 )
2026-01-31 00:02:03 +03:00 · 2026-01-29 09:48:49 -08:00 · 2026-01-29 09:47:53 -08:00 · 2026-01-29 09:47:02 -08:00
12 changed files with 24 additions and 384 deletions
--- a/api/types.go
+++ b/api/types.go
@@ -912,19 +912,6 @@ type UserResponse struct {
 	Plan      string    `json:"plan,omitempty"`
 }

-type UsageResponse struct {
-	// Start is the time the server started tracking usage (UTC, RFC 3339).
-	Start time.Time        `json:"start"`
-	Usage []ModelUsageData `json:"usage"`
-}
-
-type ModelUsageData struct {
-	Model            string `json:"model"`
-	Requests         int64  `json:"requests"`
-	PromptTokens     int64  `json:"prompt_tokens"`
-	CompletionTokens int64  `json:"completion_tokens"`
-}
-
 // Tensor describes the metadata for a given tensor.
 type Tensor struct {
 	Name  string   `json:"name"`
--- a/cmd/cmd.go
+++ b/cmd/cmd.go
@@ -1888,7 +1888,7 @@ func NewCLI() *cobra.Command {
 	serveCmd := &cobra.Command{
 		Use:     "serve",
 		Aliases: []string{"start"},
-		Short:   "Start ollama",
+		Short:   "Start Ollama",
 		Args:    cobra.ExactArgs(0),
 		RunE:    RunServer,
 	}
--- a/cmd/config/selector.go
+++ b/cmd/config/selector.go
@@ -275,7 +275,11 @@ func parseInput(r io.Reader) (inputEvent, byte, error) {
 func renderSelect(w io.Writer, prompt string, s *selectState) int {
 	filtered := s.filtered()

-	fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
+	if s.filter == "" {
+		fmt.Fprintf(w, "%s %sType to filter...%s\r\n", prompt, ansiGray, ansiReset)
+	} else {
+		fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
+	}
 	lineCount := 1

 	if len(filtered) == 0 {
@@ -314,7 +318,11 @@ func renderSelect(w io.Writer, prompt string, s *selectState) int {
 func renderMultiSelect(w io.Writer, prompt string, s *multiSelectState) int {
 	filtered := s.filtered()

-	fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
+	if s.filter == "" {
+		fmt.Fprintf(w, "%s %sType to filter...%s\r\n", prompt, ansiGray, ansiReset)
+	} else {
+		fmt.Fprintf(w, "%s %s\r\n", prompt, s.filter)
+	}
 	lineCount := 1

 	if len(filtered) == 0 {
--- a/docs/api.md
+++ b/docs/api.md
@@ -15,7 +15,6 @@
 - [Push a Model](#push-a-model)
 - [Generate Embeddings](#generate-embeddings)
 - [List Running Models](#list-running-models)
- [Usage](#usage)
 - [Version](#version)
 - [Experimental: Image Generation](#image-generation-experimental)

@@ -1855,53 +1854,6 @@ curl http://localhost:11434/api/embeddings -d '{
 }
 ```

-## Usage
-
-```
-GET /api/usage
-```
-
-Show aggregate usage statistics per model since the server started. All timestamps are UTC in RFC 3339 format.
-
-### Examples
-
-#### Request
-
-```shell
-curl http://localhost:11434/api/usage
-```
-
-#### Response
-
-```json
-{
-  "start": "2025-01-27T20:00:00Z",
-  "usage": [
-    {
-      "model": "llama3.2",
-      "requests": 5,
-      "prompt_tokens": 130,
-      "completion_tokens": 890
-    },
-    {
-      "model": "deepseek-r1",
-      "requests": 2,
-      "prompt_tokens": 48,
-      "completion_tokens": 312
-    }
-  ]
-}
-```
-
-#### Response fields
-
- `start`: when the server started tracking usage (UTC, RFC 3339)
- `usage`: list of per-model usage statistics
-  - `model`: model name
-  - `requests`: total number of completed requests
-  - `prompt_tokens`: total prompt tokens evaluated
-  - `completion_tokens`: total completion tokens generated
-
 ## Version

 ```
--- a/docs/integrations/opencode.mdx
+++ b/docs/integrations/opencode.mdx
@@ -9,7 +9,7 @@ OpenCode is an open-source AI coding assistant that runs in your terminal.
 Install the [OpenCode CLI](https://opencode.ai):

 ```bash
-curl -fsSL https://opencode.ai/install.sh | bash
+curl -fsSL https://opencode.ai/install | bash
 ```

 <Note>OpenCode requires a larger context window. It is recommended to use a context window of at least 64k tokens. See [Context length](/context-length) for more information.</Note>
--- a/server/routes.go
+++ b/server/routes.go
@@ -85,7 +85,6 @@ type Server struct {
 	addr    net.Addr
 	sched   *Scheduler
 	lowVRAM bool
-	usage   *UsageTracker
 }

 func init() {
@@ -274,10 +273,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 		c.Header("Content-Type", contentType)

 		fn := func(resp api.GenerateResponse) error {
-			if resp.Done {
-				s.usage.Record(origModel, resp.PromptEvalCount, resp.EvalCount)
-			}
-
 			resp.Model = origModel
 			resp.RemoteModel = m.Config.RemoteModel
 			resp.RemoteHost = m.Config.RemoteHost
@@ -584,8 +579,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 					}
 					res.Context = tokens
 				}
-
-				s.usage.Record(req.Model, cr.PromptEvalCount, cr.EvalCount)
 			}

 			if builtinParser != nil {
@@ -1597,8 +1590,6 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
 	r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
 	r.POST("/api/copy", s.CopyHandler)

-	r.GET("/api/usage", s.UsageHandler)
-
 	// Inference
 	r.GET("/api/ps", s.PsHandler)
 	r.POST("/api/generate", s.GenerateHandler)
@@ -1667,7 +1658,7 @@ func Serve(ln net.Listener) error {
 		}
 	}

-	s := &Server{addr: ln.Addr(), usage: NewUsageTracker()}
+	s := &Server{addr: ln.Addr()}

 	var rc *ollama.Registry
 	if useClient2 {
@@ -1884,10 +1875,6 @@ func (s *Server) SignoutHandler(c *gin.Context) {
 	c.JSON(http.StatusOK, nil)
 }

-func (s *Server) UsageHandler(c *gin.Context) {
-	c.JSON(http.StatusOK, s.usage.Stats())
-}
-
 func (s *Server) PsHandler(c *gin.Context) {
 	models := []api.ProcessModelResponse{}

@@ -2046,10 +2033,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
 		c.Header("Content-Type", contentType)

 		fn := func(resp api.ChatResponse) error {
-			if resp.Done {
-				s.usage.Record(origModel, resp.PromptEvalCount, resp.EvalCount)
-			}
-
 			resp.Model = origModel
 			resp.RemoteModel = m.Config.RemoteModel
 			resp.RemoteHost = m.Config.RemoteHost
@@ -2270,8 +2253,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
 					res.DoneReason = r.DoneReason.String()
 					res.TotalDuration = time.Since(checkpointStart)
 					res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
-
-					s.usage.Record(req.Model, r.PromptEvalCount, r.EvalCount)
 				}

 				if builtinParser != nil {
--- a/server/routes_debug_test.go
+++ b/server/routes_debug_test.go
@@ -29,7 +29,6 @@ func TestGenerateDebugRenderOnly(t *testing.T) {
 	}

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -223,7 +222,6 @@ func TestChatDebugRenderOnly(t *testing.T) {
 	}

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
--- a/server/routes_generate_renderer_test.go
+++ b/server/routes_generate_renderer_test.go
@@ -34,7 +34,6 @@ func TestGenerateWithBuiltinRenderer(t *testing.T) {
 	}

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -219,7 +218,6 @@ func TestGenerateWithDebugRenderOnly(t *testing.T) {
 	}

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
--- a/server/routes_generate_test.go
+++ b/server/routes_generate_test.go
@@ -88,39 +88,19 @@ func TestGenerateChatRemote(t *testing.T) {
 		if r.Method != http.MethodPost {
 			t.Errorf("Expected POST request, got %s", r.Method)
 		}
+		if r.URL.Path != "/api/chat" {
+			t.Errorf("Expected path '/api/chat', got %s", r.URL.Path)
+		}

 		w.WriteHeader(http.StatusOK)
 		w.Header().Set("Content-Type", "application/json")
-
-		switch r.URL.Path {
-		case "/api/chat":
-			resp := api.ChatResponse{
-				Model:      "test",
-				Done:       true,
-				DoneReason: "load",
-				Metrics: api.Metrics{
-					PromptEvalCount: 10,
-					EvalCount:       20,
-				},
-			}
-			if err := json.NewEncoder(w).Encode(&resp); err != nil {
-				t.Fatal(err)
-			}
-		case "/api/generate":
-			resp := api.GenerateResponse{
-				Model:      "test",
-				Done:       true,
-				DoneReason: "stop",
-				Metrics: api.Metrics{
-					PromptEvalCount: 5,
-					EvalCount:       15,
-				},
-			}
-			if err := json.NewEncoder(w).Encode(&resp); err != nil {
-				t.Fatal(err)
-			}
-		default:
-			t.Errorf("unexpected path %s", r.URL.Path)
+		resp := api.ChatResponse{
+			Model:      "test",
+			Done:       true,
+			DoneReason: "load",
+		}
+		if err := json.NewEncoder(w).Encode(&resp); err != nil {
+			t.Fatal(err)
 		}
 	}))
 	defer rs.Close()
@@ -131,7 +111,7 @@ func TestGenerateChatRemote(t *testing.T) {
 	}

 	t.Setenv("OLLAMA_REMOTES", p.Hostname())
-	s := Server{usage: NewUsageTracker()}
+	s := Server{}
 	w := createRequest(t, s.CreateHandler, api.CreateRequest{
 		Model:      "test-cloud",
 		RemoteHost: rs.URL,
@@ -179,61 +159,6 @@ func TestGenerateChatRemote(t *testing.T) {
 			t.Errorf("expected done reason load, got %s", actual.DoneReason)
 		}
 	})
-
-	t.Run("remote chat usage tracking", func(t *testing.T) {
-		stats := s.usage.Stats()
-		found := false
-		for _, m := range stats.Usage {
-			if m.Model == "test-cloud" {
-				found = true
-				if m.Requests != 1 {
-					t.Errorf("expected 1 request, got %d", m.Requests)
-				}
-				if m.PromptTokens != 10 {
-					t.Errorf("expected 10 prompt tokens, got %d", m.PromptTokens)
-				}
-				if m.CompletionTokens != 20 {
-					t.Errorf("expected 20 completion tokens, got %d", m.CompletionTokens)
-				}
-			}
-		}
-		if !found {
-			t.Error("expected usage entry for test-cloud")
-		}
-	})
-
-	t.Run("remote generate usage tracking", func(t *testing.T) {
-		// Reset the tracker for a clean test
-		s.usage = NewUsageTracker()
-
-		w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
-			Model:  "test-cloud",
-			Prompt: "hello",
-		})
-		if w.Code != http.StatusOK {
-			t.Fatalf("expected status 200, got %d", w.Code)
-		}
-
-		stats := s.usage.Stats()
-		found := false
-		for _, m := range stats.Usage {
-			if m.Model == "test-cloud" {
-				found = true
-				if m.Requests != 1 {
-					t.Errorf("expected 1 request, got %d", m.Requests)
-				}
-				if m.PromptTokens != 5 {
-					t.Errorf("expected 5 prompt tokens, got %d", m.PromptTokens)
-				}
-				if m.CompletionTokens != 15 {
-					t.Errorf("expected 15 completion tokens, got %d", m.CompletionTokens)
-				}
-			}
-		}
-		if !found {
-			t.Error("expected usage entry for test-cloud")
-		}
-	})
 }

 func TestGenerateChat(t *testing.T) {
@@ -251,7 +176,6 @@ func TestGenerateChat(t *testing.T) {
 	}

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -968,7 +892,6 @@ func TestGenerate(t *testing.T) {
 	}

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -1453,7 +1376,6 @@ func TestGenerateLogprobs(t *testing.T) {
 		}

 		s := &Server{
-			usage: NewUsageTracker(),
 			sched: &Scheduler{
 				pendingReqCh:    make(chan *LlmRequest, 1),
 				finishedReqCh:   make(chan *LlmRequest, 1),
@@ -1634,7 +1556,6 @@ func TestChatLogprobs(t *testing.T) {
 		}

 		s := &Server{
-			usage: NewUsageTracker(),
 			sched: &Scheduler{
 				pendingReqCh:    make(chan *LlmRequest, 1),
 				finishedReqCh:   make(chan *LlmRequest, 1),
@@ -1745,7 +1666,6 @@ func TestChatWithPromptEndingInThinkTag(t *testing.T) {
 		}

 		s := &Server{
-			usage: NewUsageTracker(),
 			sched: &Scheduler{
 				pendingReqCh:    make(chan *LlmRequest, 1),
 				finishedReqCh:   make(chan *LlmRequest, 1),
@@ -2192,7 +2112,6 @@ func TestGenerateUnload(t *testing.T) {
 	var loadFnCalled bool

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -2294,7 +2213,6 @@ func TestGenerateWithImages(t *testing.T) {
 	}

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -2452,7 +2370,6 @@ func TestImageGenerateStreamFalse(t *testing.T) {

 	opts := api.DefaultOptions()
 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:  make(chan *LlmRequest, 1),
 			finishedReqCh: make(chan *LlmRequest, 1),
--- a/server/routes_harmony_streaming_test.go
+++ b/server/routes_harmony_streaming_test.go
@@ -255,7 +255,6 @@ func TestChatHarmonyParserStreamingRealtime(t *testing.T) {
 			}

 			s := Server{
-				usage: NewUsageTracker(),
 				sched: &Scheduler{
 					pendingReqCh:    make(chan *LlmRequest, 1),
 					finishedReqCh:   make(chan *LlmRequest, 1),
@@ -407,7 +406,6 @@ func TestChatHarmonyParserStreamingSimple(t *testing.T) {
 	}

 	s := Server{
-		usage: NewUsageTracker(),
 		sched: &Scheduler{
 			pendingReqCh:    make(chan *LlmRequest, 1),
 			finishedReqCh:   make(chan *LlmRequest, 1),
@@ -590,7 +588,6 @@ func TestChatHarmonyParserStreaming(t *testing.T) {
 			}

 			s := Server{
-				usage: NewUsageTracker(),
 				sched: &Scheduler{
 					pendingReqCh:    make(chan *LlmRequest, 1),
 					finishedReqCh:   make(chan *LlmRequest, 1),
--- a/server/usage.go
+++ b/server/usage.go
@@ -1,62 +0,0 @@
-package server
-
-import (
-	"sync"
-	"time"
-
-	"github.com/ollama/ollama/api"
-)
-
-type ModelUsage struct {
-	Requests         int64
-	PromptTokens     int64
-	CompletionTokens int64
-}
-
-type UsageTracker struct {
-	mu     sync.Mutex
-	start  time.Time
-	models map[string]*ModelUsage
-}
-
-func NewUsageTracker() *UsageTracker {
-	return &UsageTracker{
-		start:  time.Now().UTC(),
-		models: make(map[string]*ModelUsage),
-	}
-}
-
-func (u *UsageTracker) Record(model string, promptTokens, completionTokens int) {
-	u.mu.Lock()
-	defer u.mu.Unlock()
-
-	m, ok := u.models[model]
-	if !ok {
-		m = &ModelUsage{}
-		u.models[model] = m
-	}
-
-	m.Requests++
-	m.PromptTokens += int64(promptTokens)
-	m.CompletionTokens += int64(completionTokens)
-}
-
-func (u *UsageTracker) Stats() api.UsageResponse {
-	u.mu.Lock()
-	defer u.mu.Unlock()
-
-	byModel := make([]api.ModelUsageData, 0, len(u.models))
-	for model, usage := range u.models {
-		byModel = append(byModel, api.ModelUsageData{
-			Model:            model,
-			Requests:         usage.Requests,
-			PromptTokens:     usage.PromptTokens,
-			CompletionTokens: usage.CompletionTokens,
-		})
-	}
-
-	return api.UsageResponse{
-		Start: u.start,
-		Usage: byModel,
-	}
-}
--- a/server/usage_test.go
+++ b/server/usage_test.go
@@ -1,136 +0,0 @@
-package server
-
-import (
-	"encoding/json"
-	"net/http"
-	"net/http/httptest"
-	"sync"
-	"testing"
-
-	"github.com/gin-gonic/gin"
-	"github.com/ollama/ollama/api"
-)
-
-func TestUsageTrackerRecord(t *testing.T) {
-	tracker := NewUsageTracker()
-
-	tracker.Record("model-a", 10, 20)
-	tracker.Record("model-a", 5, 15)
-	tracker.Record("model-b", 100, 200)
-
-	stats := tracker.Stats()
-
-	if len(stats.Usage) != 2 {
-		t.Fatalf("expected 2 models, got %d", len(stats.Usage))
-	}
-
-	lookup := make(map[string]api.ModelUsageData)
-	for _, m := range stats.Usage {
-		lookup[m.Model] = m
-	}
-
-	a := lookup["model-a"]
-	if a.Requests != 2 {
-		t.Errorf("model-a requests: expected 2, got %d", a.Requests)
-	}
-	if a.PromptTokens != 15 {
-		t.Errorf("model-a prompt tokens: expected 15, got %d", a.PromptTokens)
-	}
-	if a.CompletionTokens != 35 {
-		t.Errorf("model-a completion tokens: expected 35, got %d", a.CompletionTokens)
-	}
-
-	b := lookup["model-b"]
-	if b.Requests != 1 {
-		t.Errorf("model-b requests: expected 1, got %d", b.Requests)
-	}
-	if b.PromptTokens != 100 {
-		t.Errorf("model-b prompt tokens: expected 100, got %d", b.PromptTokens)
-	}
-	if b.CompletionTokens != 200 {
-		t.Errorf("model-b completion tokens: expected 200, got %d", b.CompletionTokens)
-	}
-}
-
-func TestUsageTrackerConcurrent(t *testing.T) {
-	tracker := NewUsageTracker()
-
-	var wg sync.WaitGroup
-	for range 100 {
-		wg.Add(1)
-		go func() {
-			defer wg.Done()
-			tracker.Record("model-a", 1, 2)
-		}()
-	}
-	wg.Wait()
-
-	stats := tracker.Stats()
-	if len(stats.Usage) != 1 {
-		t.Fatalf("expected 1 model, got %d", len(stats.Usage))
-	}
-
-	m := stats.Usage[0]
-	if m.Requests != 100 {
-		t.Errorf("requests: expected 100, got %d", m.Requests)
-	}
-	if m.PromptTokens != 100 {
-		t.Errorf("prompt tokens: expected 100, got %d", m.PromptTokens)
-	}
-	if m.CompletionTokens != 200 {
-		t.Errorf("completion tokens: expected 200, got %d", m.CompletionTokens)
-	}
-}
-
-func TestUsageTrackerStart(t *testing.T) {
-	tracker := NewUsageTracker()
-
-	stats := tracker.Stats()
-	if stats.Start.IsZero() {
-		t.Error("expected non-zero start time")
-	}
-}
-
-func TestUsageHandler(t *testing.T) {
-	gin.SetMode(gin.TestMode)
-
-	s := &Server{
-		usage: NewUsageTracker(),
-	}
-
-	s.usage.Record("llama3", 50, 100)
-	s.usage.Record("llama3", 25, 50)
-
-	w := httptest.NewRecorder()
-	c, _ := gin.CreateTestContext(w)
-	c.Request = httptest.NewRequest(http.MethodGet, "/api/usage", nil)
-
-	s.UsageHandler(c)
-
-	if w.Code != http.StatusOK {
-		t.Fatalf("expected status 200, got %d", w.Code)
-	}
-
-	var resp api.UsageResponse
-	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
-		t.Fatalf("failed to unmarshal response: %v", err)
-	}
-
-	if len(resp.Usage) != 1 {
-		t.Fatalf("expected 1 model, got %d", len(resp.Usage))
-	}
-
-	m := resp.Usage[0]
-	if m.Model != "llama3" {
-		t.Errorf("expected model llama3, got %s", m.Model)
-	}
-	if m.Requests != 2 {
-		t.Errorf("expected 2 requests, got %d", m.Requests)
-	}
-	if m.PromptTokens != 75 {
-		t.Errorf("expected 75 prompt tokens, got %d", m.PromptTokens)
-	}
-	if m.CompletionTokens != 150 {
-		t.Errorf("expected 150 completion tokens, got %d", m.CompletionTokens)
-	}
-}
Author	SHA1	Message	Date
Parth Sareen	a0923cbdd0	cmd: ollama launch add placeholder text for selector (#13966 )	2026-01-29 09:48:49 -08:00
Seokrin Taron Sung	f92e362b2e	cmd: capitalize Ollama in serve command help text (#13965 )	2026-01-29 09:47:53 -08:00
Tincho	aa23d8ecd2	docs: update installation command for OpenCode CLI (#13971 )	2026-01-29 09:47:02 -08:00